Compare commits

..

569 Commits

Author SHA1 Message Date
Gael Guennebaud
fe0350cf1b bump 2012-02-06 16:39:26 +01:00
Gael Guennebaud
99c694623a fix a dozen of warnings with MSVC, and get rid of some useless throw() 2012-02-06 15:57:51 +01:00
Gael Guennebaud
6ad48c5d92 fix conjugation in packet_lhs 2012-02-05 18:18:38 +01:00
Gael Guennebaud
4ed87c59c7 Update the PARDISO interface to match other sparse solvers.
- Add support for Upper or Lower inputs.
- Add supports for sparse RHS
- Remove transposed cases, remove ordering method interface
- Add full access to PARDISO parameters
2012-02-04 14:20:56 +01:00
Gael Guennebaud
1763f86364 add the recent setFromTriplets() feature in the manual 2012-02-04 10:44:07 +01:00
Gael Guennebaud
fe85b7ebc6 fix several const qualifier issues: double ones, meaningless ones, some missing ones, etc.
(note that const qualifiers are set by internall::nested)
2012-02-03 23:18:26 +01:00
Gael Guennebaud
bc7b251cd9 fix compilation errors with ICC 2012-02-03 23:16:52 +01:00
Gael Guennebaud
a594d7ffd7 stop disabling this legitimate warning, recall that in the following the const on FooRef is really meaningless:
typedef Foo& FooRef;
const FooRef foo;
2012-02-03 23:16:11 +01:00
Gael Guennebaud
ad4aa7873f remove unused variables 2012-02-03 13:30:48 +01:00
Gael Guennebaud
fd4aefadcd fix ctest -D Foo with MSVC 2008 2012-02-03 10:50:49 +01:00
Zuiquan
a64407f086 Enable Eigen to compile on 'pure C/C++' Gcc environment (with no inline assembly or asm directive). Required if we want to use Eigen with Adobe Alchemy. 2012-02-02 12:05:02 +01:00
Gael Guennebaud
13abb37721 shutup floating point underflow warning for this specific unit test 2012-01-31 23:18:17 +01:00
Gael Guennebaud
7002639844 the default ctor had no sense because of the const reference member 2012-01-31 23:12:04 +01:00
Gael Guennebaud
13e46ad847 add missing return *this 2012-01-31 23:11:13 +01:00
Gael Guennebaud
9a954d29ec rm non standard and useless overloads of is_arithmetic for long long 2012-01-31 21:45:03 +01:00
Gael Guennebaud
634fedaf68 proper C++ casting 2012-01-31 18:56:25 +01:00
Gael Guennebaud
10cd52350f fix a few warnings: change of sign and missing return statement 2012-01-31 13:05:44 +01:00
Gael Guennebaud
9c86ee2695 fix static inline versus inline static issues (the former is the correct order) 2012-01-31 12:58:52 +01:00
Gael Guennebaud
8d6e394b06 workaround "empty macro argument" warning 2012-01-31 12:46:14 +01:00
Gael Guennebaud
670e3af5a8 add .data() member to Diagonal<> 2012-01-31 12:44:59 +01:00
Gael Guennebaud
18e3ac0f0d fix some compilation errors with ICC and -strict-ansi 2012-01-31 09:14:01 +01:00
Gael Guennebaud
87138075da add the possibility to assemble a SparseMatrix object from a random list of triplets that may contain duplicated elements. It works in linear time, with O(1) re-allocations. 2012-01-28 11:13:59 +01:00
Gael Guennebaud
fc2d85d139 fix memory leak in SuperLUSupport 2012-01-27 10:07:09 +01:00
Gael Guennebaud
27d222d23e honor nested types in dense * sparse 2012-01-27 09:39:36 +01:00
Jitse Niesen
ed244e9c1a Document that JacobiSVD also handles complex matrices.
Thanks to 'Jazzdude' for noting this on IRC.
2012-01-26 13:16:50 +00:00
Gael Guennebaud
0251bb6c1d add missing inline keyword (linking issue) 2012-01-26 10:53:42 +01:00
Gael Guennebaud
65d5311c68 SimplicialCholesky: the shift offset must be real, and fix a comparison issue for complexes 2012-01-26 10:34:45 +01:00
Gael Guennebaud
d9f5840f7a simple compilation fix 2012-01-26 08:52:20 +01:00
Gael Guennebaud
a108216af1 fix bug #410: fix a possible out of range access in EigenSolver 2012-01-25 19:02:31 +01:00
Christoph Hertzberg
362fcabc44 Check for positive definiteness in SimplicialLLT 2012-01-14 22:34:18 +01:00
Gael Guennebaud
5e4dfa4a09 fix a nesting type issue in Sparse/TriangularView 2012-01-25 18:16:48 +01:00
Gael Guennebaud
606e204f6d fix bug #406: Using OpenMP and Eigen causes infinite loop/deadlock
(transplanted from fd52daae87
)
2012-01-25 17:42:22 +01:00
Gael Guennebaud
c68616b3b5 fix warning with gcc 4.6 2012-01-25 15:48:50 +01:00
Gael Guennebaud
87f2af5930 workaround ICC compilation error with -strict-ansi 2012-01-25 15:45:01 +01:00
Gael Guennebaud
d615d39af0 determine windows version from major.minor only, the patch number is irrelevant. 2012-01-23 21:56:46 +01:00
Gael Guennebaud
0d03492e1e std::isfinite is non standard 2012-01-23 21:49:00 +01:00
Gael Guennebaud
ee9f3e34b0 LLT: improve rankUpdate to support downdates,
LDLT: add the missing info() function,
improve unit testing of rankUpdate()
2012-01-23 17:28:23 +01:00
Abraham Bachrach
039408cd66 added functions to allow for cwise min/max operations with scalar argument (bug #400).
added function for array.min(), array.max(), matrix.cwiseMin(), matrix.cwiseMax().

The matrix.cwiseMin/Max functions required the definition of the ConstantReturnType typedef.
However, it wasn't defined until after MatrixCwiseBinaryOps was included in Eigen/src/SparseCore/SparseMatrixBase.h,
so I moved those includes after the definition of the typedefs.

tests for both the regular and scalar min/max functions were added as well
2012-01-11 11:00:30 -05:00
Gael Guennebaud
238999045c optimize the packing of lhs blocks for matrix-matrix products => significant speedup for small products 2012-01-21 19:34:28 +01:00
Jitse Niesen
0e1e0a2a58 Make sure that now-fixed assert is not triggered. 2012-01-19 14:30:44 +00:00
Keir Mierle
274f8a0947 Fix broken asserts releaved by Clang. 2012-01-18 15:03:27 -08:00
Gael Guennebaud
589cc627f8 fixe one more VC10 ICE 2012-01-18 17:45:22 +01:00
Gael Guennebaud
db8f528737 fix VC10 ICE 2012-01-18 17:42:13 +01:00
Jitse Niesen
d6bf9f848a Correct description of rankUpdate() in quick reference guide.
Thanks to Sameer Agarwal for pointing out this mistake.
(transplanted from bc0fc5d21e
)
2012-01-09 12:57:11 +00:00
Keir Mierle
2d4fee0b40 Fix out-of-range int constant in 4x4 inverse.
(transplanted from 45bcad41b4
)
2012-01-05 23:15:09 -08:00
Gael Guennebaud
e7ef367db1 suppress unused variable warnings 2012-01-06 09:02:06 +01:00
Gael Guennebaud
bdee0c9baa set the default number of iteration to the size of the problem 2011-12-27 16:38:05 +01:00
Gael Guennebaud
15ea999f84 pushed too fast the previous one 2011-12-23 23:22:31 +01:00
Gael Guennebaud
901bcdd2a8 the previous test works for Dynamic sizes only 2011-12-23 23:16:43 +01:00
Gael Guennebaud
96a18ef230 add a reconstruction test 2011-12-23 23:15:08 +01:00
Gael Guennebaud
8171adb7ff fix bug #398, the quaternion returned by slerp was not always normalized,
add a proper unit test for slerp
2011-12-23 22:39:32 +01:00
Gael Guennebaud
67ae94f3a2 fix compilation of sparse_basic unit test for complexes 2011-12-23 09:41:14 +01:00
Gael Guennebaud
e3e39ea26d suppress an 'unused variable' warning 2011-12-22 14:06:16 +01:00
Gael Guennebaud
2c03e6fccc evaluate 1D sparse expressions into SparseVector and make the sparse operator<< and dot honor nested types 2011-12-22 14:01:06 +01:00
Gael Guennebaud
7f04845023 fix assignment of a row-major sparse vector to a column major sparse one 2011-12-22 11:53:47 +01:00
Gael Guennebaud
e4cea957df fix bug #391: prune was for compressed format only, now it also turns the matrix into compressed form 2011-12-20 18:37:24 +01:00
Gael Guennebaud
7e866c447f fix bug #391: improper stream output for uncompressed mode, also avoid double debugging outputs for column major matrices 2011-12-20 18:31:00 +01:00
Gael Guennebaud
6f92b75874 add aliasing test for sparse*sparse product 2011-12-20 18:10:22 +01:00
Gael Guennebaud
50d756b9ea fix bug #394: innerVector::nonZeros() was broken for uncompressed mode 2011-12-20 18:10:02 +01:00
Gael Guennebaud
15d781b64c we need to define EXTRACT_ALL to YES to get doxygen see the whole hierarchy. Exclude internal::* from the doc. 2011-12-20 10:25:54 +01:00
Gael Guennebaud
fcc966b40d workaround doxygen limitation to follow the base class of PlainObjectBase 2011-12-19 22:13:11 +01:00
Gael Guennebaud
33e52a3943 rm local fill-in ratio estimation (was broken sometimes) 2011-12-16 16:29:46 +01:00
Gael Guennebaud
732a50d043 implement a more optimistic heuristic to predict the nnz of a saprse*sparse product 2011-12-16 15:59:44 +01:00
Gael Guennebaud
40c0f3af57 fig bug #396: add a static assertion on the storage order of a sparse-sparse coeff-wise binary op 2011-12-15 19:23:20 +01:00
Jitse Niesen
3db6455896 Remove evaluators for 2.1 release.
We plan to re-instate them when we branch 2.2 (see bug #388).
2011-12-14 21:23:43 +00:00
Gael Guennebaud
0308c11849 remove a file that was not intended to be committed 2011-12-13 08:42:48 +01:00
Jitse Niesen
1e7712771e Remove asserts that eigenvalue computation has converged (bug #354). 2011-12-12 17:17:38 +00:00
Gael Guennebaud
1aa6c7f122 fix sparse insertion example 2011-12-11 17:18:14 +01:00
Gael Guennebaud
d738bedc5b remove redundant declaration (fix compilation with clang 3.0) 2011-12-11 11:45:03 +01:00
Gael Guennebaud
f60e6f5ee8 s/compressed()/isCompressed() 2011-12-10 23:08:10 +01:00
Gael Guennebaud
594fd2d11d Cholmod: add support for uncompressed SparseMatrix objects 2011-12-10 22:53:31 +01:00
Gael Guennebaud
9d7d634897 add cholmod_support unit tests 2011-12-10 19:32:17 +01:00
Gael Guennebaud
f35708d2e0 enforce weak linking of xerbla 2011-12-10 19:30:36 +01:00
Gael Guennebaud
105e170d8b trivial compilation fix 2011-12-10 16:17:12 +01:00
Gael Guennebaud
2600ba1731 feature 297: s/intersectionPoint/pointAt, fix documentation, add a unit test 2011-12-10 12:17:42 +01:00
Andy Somerville
c06ae325a4 feature 297: add ParametrizedLine::intersectionPoint() and intersectionParam()
-> intersection() is deprecated
2011-12-10 11:58:38 +01:00
Igor Krivenko
36457178f9 bug #352:properly cast constants 2011-12-09 23:38:41 +01:00
Gael Guennebaud
d400a6245e fix compilation with EIGEN_NO_DEBUG 2011-12-09 23:42:39 +01:00
Gael Guennebaud
38277e8a9b feature 319: fix LDLT::rankUpdate for complex/upper, simply the algortihm, update copyrights 2011-12-09 23:08:38 +01:00
Tim Holy
2d7c3eea53 feature 319: Add update and downdate functionality to LDLT 2011-12-09 21:04:44 +01:00
Gael Guennebaud
37f304a2e6 add a "using MKL" documentation page, add a minimal documentation of PARDISO wrapper classes, refine a bit the EIGEN_USE_* logic 2011-12-09 16:52:37 +01:00
Sebastian Lipponer
fff25a4b46 Fix MSVC integer overflow warning 2011-12-09 10:39:10 +00:00
Gael Guennebaud
57c6bfba08 add missing CMakeLists.txt 2011-12-09 10:53:12 +01:00
Gael Guennebaud
081abb701d add user defined CXX and LINKER flag cmake variables for the unit tests 2011-12-09 10:50:13 +01:00
Gael Guennebaud
10447a7b57 mv blas.h to src/misc such that it would be possible to use any blas libraries,
however, this requires some more works:
 - add const qualifiers in the declarations of blas.h
 - add the possibility to add a suffix to blas function names
2011-12-09 10:40:35 +01:00
Gael Guennebaud
43cdd242d0 - split and rename defined tokens to enable the use of BLAS/Lapack/VML/etc
- include MKL headers outside the Eigen namespace.
2011-12-09 10:06:49 +01:00
karturov
015c331252 Intel(R) MKL support added.
* * *
License disclaimer changed to BSD license for MKL_support.h
* * *
Pardiso support fixed, test added.
blas/lapack tests fixed: Scalar parameter was added in Cholesky, product_matrix_vector_triangular remaned to triangular_matrix_vector_product.
* * *
PARDISO test was added physically.
2011-12-05 14:52:21 +07:00
Gael Guennebaud
e270a5656a fix min/max clash with clang's header by including fstream beforehand 2011-12-08 23:27:10 +01:00
Gael Guennebaud
86bb20c431 remove dead code 2011-12-08 23:22:28 +01:00
Gael Guennebaud
e36a4c880a suppress deprecated warning when compiling legacy tests 2011-12-08 23:15:07 +01:00
Gael Guennebaud
06450882ab add missing CMakeLists.txt in Splines 2011-12-08 23:12:39 +01:00
Jitse Niesen
dd232e30b0 Document QuaternionBase, minor doc improvements.
* Document class QuaternionBase so that docs for members are displayed.
* Remove obsolete \redstar refering to Array module
* Fix typo in Constants.h
* Document EIGEN_NO_AUTOMATIC_RESIZING
2011-12-08 14:22:06 +00:00
Gael Guennebaud
a1fa05f14e improve compiler name and version detection 2011-12-07 13:20:52 +01:00
Gael Guennebaud
a0da96e2f4 fix detection of ICC version 2011-12-06 22:07:20 +01:00
Gael Guennebaud
80f8ed9f9c improve compiler and architecture detection 2011-12-06 19:54:34 +01:00
Thomas Capricelli
c3ad1f9382 eigen_gen_docs: dont try to update permissions on server 2011-12-06 15:55:20 +01:00
Gael Guennebaud
6ec0af6dc7 Added tag 3.1.0-alpha1 for changeset e017f798eb 2011-12-06 15:53:46 +01:00
Gael Guennebaud
e017f798eb bump 2011-12-06 15:53:17 +01:00
Hauke Heibel
accae638b2 Fixed a typo. 2011-12-06 15:42:05 +01:00
Gael Guennebaud
84cf1b5b1d fix QuaternionBase::cast.
It did not work with clang, and I'm unsure how it worked for gcc/msvc since QuaternionBase was introduced
2011-12-05 14:13:59 +01:00
Gael Guennebaud
9ca673daed fix compilation with clang 2011-12-05 12:50:43 +01:00
Gael Guennebaud
dd504d6aae fix bug #223: SparseMatrix::Flags no longer encode triangularness information 2011-12-05 10:17:09 +01:00
Gael Guennebaud
59576014a9 fig bug #373: compilation error with clang 2.9 when exceptions are disabled (cannot reproduce with clang 3.0 or 3.1) 2011-12-05 09:44:25 +01:00
Gael Guennebaud
b60624dc2a fix bug #384: add a static assertion on the Index type which has to be signed 2011-12-04 22:14:53 +01:00
Gael Guennebaud
82f9aa194d fix bug #294: add a diagonal() method to SparseMatrix (const) 2011-12-04 21:49:21 +01:00
Gael Guennebaud
69966e90e1 fix bug #221: remove the dense to SparseVector conversion ctor. 2011-12-04 21:15:46 +01:00
Gael Guennebaud
5dc9650f11 fix bug #281: replace csparse macros by template functions 2011-12-04 19:15:23 +01:00
Hauke Heibel
a8a2bf3b5a Added docs to the spline module. 2011-12-04 18:44:01 +01:00
Gael Guennebaud
9bd902ed9c fix bug #341: trisove on MappedSparseMatrix 2011-12-04 14:57:43 +01:00
Gael Guennebaud
9353bbac4a fix bug #356: fix TriangularView::InnerIterator for unit diagonals 2011-12-04 14:39:24 +01:00
Gael Guennebaud
32917515df make the accessors to internal sparse storage part of the public API and remove their "_" prefix. 2011-12-04 12:19:26 +01:00
Gael Guennebaud
1cdbae62db add SparseVector::ReverseInnerIterator 2011-12-04 09:56:40 +01:00
Gael Guennebaud
91e392a042 add ReverseInnerIterators to loop over the elements in reverse order,
and partly fix bug #356 (issue in trisolve for upper-column major))
2011-12-03 23:49:37 +01:00
Gael Guennebaud
a09cc5d4c0 fix bug #282: add the possibiliry to shift the diagonal coefficients via a linear function. 2011-12-03 18:26:08 +01:00
Gael Guennebaud
c861e05181 fix matrix names in the insertion example 2011-12-03 18:14:51 +01:00
Gael Guennebaud
9ae606866c Eigen2sSupport: import some fixes from the 3.0 branch (MSVC fix) 2011-12-03 17:45:07 +01:00
Gael Guennebaud
950eeab4d7 RandomSetter: turns the matrix into compressed form before the filling 2011-12-03 17:35:21 +01:00
Gael Guennebaud
c0e36516f3 add a command to fix the permission of the uploaded documentation 2011-12-03 11:18:20 +01:00
Gael Guennebaud
3f56de2628 improve sparse manual 2011-12-03 10:26:00 +01:00
Gael Guennebaud
e759086dcd improve documentation of some sparse related classes 2011-12-02 19:02:49 +01:00
Gael Guennebaud
4ca89f32ed Sparse matrix insertion:
- automatically turn a SparseMatrix to uncompressed mode when calling insert(i,j).
 - now coeffRef insert a new element when it does not already exist
2011-12-02 19:00:16 +01:00
Gael Guennebaud
f10bae74e8 - move CompressedStorage and AmbiVector into internal namespace
- remove innerVectorNonZeros(j) => use innerVector(j).nonZeros()
2011-12-02 10:00:24 +01:00
Jitse Niesen
a0bcaa88af Extend tutorial page on broadcasting to reflect recent changes. 2011-12-01 21:16:07 +00:00
Gael Guennebaud
b85bcd91bf remove GSL dependency in the unit tests 2011-12-01 18:17:19 +01:00
Gael Guennebaud
7aaae9d6df remove useless blas reference code 2011-12-01 18:10:12 +01:00
Gael Guennebaud
3a4c78b588 add code for band triangular problems:
- currently available from the BLAS interface only
 - and for vectors only
2011-12-01 18:06:28 +01:00
Gael Guennebaud
9fdb6a2ead output error messages in blas unit tests 2011-12-01 18:04:01 +01:00
Hauke Heibel
b00a33bc70 Integrated spline class and simple spline fitting 2011-11-25 14:53:40 +01:00
Gael Guennebaud
49d652c600 fix assigment from uncompressed 2011-11-30 21:55:54 +01:00
Gael Guennebaud
6b8d6887ac bug fix in SparseSelfAdjointTimeDenseProduct for empty rows or columns 2011-11-30 19:39:20 +01:00
Gael Guennebaud
00d4a360ba bug fix in SparseView::incrementToNonZero 2011-11-30 19:31:11 +01:00
Gael Guennebaud
d1b54ecfa3 add more support for uncompressed mode 2011-11-30 19:24:43 +01:00
Gael Guennebaud
cda397b117 cleanning pass on the sparse modules:
- remove outdated/deprecated code
 - improve a bit the documentation
2011-11-28 16:36:37 +01:00
Gael Guennebaud
2d621d235d fix alignment computation in Block and MapBase such that aligned means aligned on 16 bytes and nothing else
(transplanted from dcb36e3d49
)
2011-11-28 13:43:10 +01:00
Marc Glisse
a2810aa32f bug #383 - another c++11-user-defined-literal fix 2011-11-27 15:27:25 -05:00
Marc Glisse
8107b3da75 bug #383 - EIGEN_ASM_COMMENT broken in C++11
this is due to the new user-defined literals syntax.
2011-11-26 17:55:18 -05:00
Gael Guennebaud
f56316f7ed add two alternative solutions to the problem of fixed size members 2011-11-25 13:46:48 +01:00
Gael Guennebaud
70206ab1e1 draft of the new sparse manual reflecting the new sparse module 2011-11-24 17:32:30 +01:00
Gael Guennebaud
57d1ccb2dc fix compilation of doc (broken by changeset bc6d78982f
- General tightening/testing of vectorwise ops)
2011-11-24 17:30:55 +01:00
Gael Guennebaud
2d4fe54b73 fix CG example 2011-11-24 08:19:13 +01:00
Gael Guennebaud
01b4b6e456 improve accuracy of 3x3 direct eigenvector extraction 2011-11-23 22:43:40 +01:00
Gael Guennebaud
be9b87377f typo 2011-11-23 08:30:10 +01:00
Jitse Niesen
63dcdb65fd Install eigen3.pc in default directory if pkgconfig not found (bug #358). 2011-11-22 17:30:35 +00:00
Benoit Jacob
ffe6d1f901 Alignment fixes:
* Fix AlignedBit computation for Plain Objects
 * use it for the conditional alignment of operator new
 * only overload new in PlainObjectBase, don't overload again in Matrix and Array
2011-11-22 09:04:31 -05:00
Gael Guennebaud
f278a3eaba stop fill pivoting LU only if the pivot is exactly 0 2011-11-22 09:18:54 +01:00
Benoit Jacob
bc6d78982f Bugs 157 and 377 - General tightening/testing of vectorwise ops:
* add lots of static assertions making it very explicit when all these ops
are supposed to work:
** all ops require the rhs vector to go in the right direction
** all ops already require that the lhs and rhs are of the same kind
(matrix vs vector) otherwise we'd have to do complex work
** multiplicative ops (introduced Kibeom's patch) are restricted to arrays, if only because for matrices they could be ambiguous.

* add a new test, vectorwiseop.cpp.

* these compound-assign operators used to be implemented with for loops:

   for(Index j=0; j<subVectors(); ++j)
     subVector(j).array() += other.derived().array();

This didn't seem to be needed; replaced by using expressions like operator+ and operator- did.
2011-11-18 11:10:27 -05:00
Kibeom Kim
de22ad117c bug #157 - Implemented *= /= * / operations for VectorwiseOp (e.g. mat.colwise()) 2011-11-17 17:57:45 -05:00
Jitse Niesen
08c0edae86 Move EIGEN_USING_MATRIX_TYPEDEFS macros to Eigen2Support. 2011-11-16 14:32:50 +00:00
Dennis Schridde
db36e4204f [Geometry/AlignedBox] New typedefs, like for Core/Matrix
Includes 1-4 and dynamic sized boxes for int, float and double type.
Also changes the tests to use these typedefs.
2011-11-09 22:12:28 +01:00
Gael Guennebaud
8fbbbe7521 fix some include paths 2011-11-16 09:27:38 +01:00
Gael Guennebaud
cb2f1944e2 add the new module headers 2011-11-12 15:22:35 +01:00
Gael Guennebaud
53fa851724 move sparse solvers from unsupported/ to main Eigen/ and remove the "not stable yet" warning 2011-11-12 14:11:27 +01:00
Gael Guennebaud
dcb66d6b40 fix ei_add_property 2011-11-12 10:54:16 +01:00
Gael Guennebaud
3e4a68cc60 optimize vectorized reductions by peeling the loop:
- x2 for squaredNorm() on double
 - peeling the loop with a peeling factor of 4 leads to even better perf
   for large vectors (e.g., >64) but it makes more difficult to keep good performance on smaller ones.
2011-11-12 09:19:48 +01:00
Gael Guennebaud
c110abb7d2 fix performance issue with SPMV 2011-11-11 06:04:31 +01:00
Gael Guennebaud
9d82a7e204 merge with hauke/eigen-cdash-improvements branch 2011-11-09 21:19:05 +01:00
Dennis Schridde
3a82aa1133 [Core/Matrix] Fix: Clear the right typedef macro 2011-11-09 12:25:55 +00:00
Gael Guennebaud
fb3aa7220f reimplement abs2 not to use std::norm which is incredibly slow. 2011-11-08 22:42:51 +01:00
Jitse Niesen
45a6bb34c3 Add simple example on how to compute Cholesky decomposition. 2011-11-07 17:14:06 +00:00
Marton Danoczy
f422668d39 Patches to support ARM NEON with Clang 3.0 and LLVM-GCC 2011-11-04 16:37:10 +01:00
Benoit Jacob
1b98b73472 Refactor force-inlining macros and use EIGEN_ALWAYS_INLINE to force inlining of the integer overflow helpers, whose non-inlining caused major performance problems, see the mailing list thread 'Significant perf regression probably due to bug #363 patches' 2011-11-06 16:27:41 -05:00
Benoit Jacob
aa3e420df5 Add test for Matrix(x, y) ctor static assert added in previous changeset 2011-11-06 00:44:04 -04:00
Benoit Jacob
ab3f138b23 In the Matrix constructor taking (rows, cols), statically assert that the types are integer.
The 2D vector ctor taking (x, y) is not concerned.
2011-11-05 23:56:48 -04:00
Gael Guennebaud
478de03bd8 fix a couple of warnings in the unit tests 2011-11-05 23:30:49 +01:00
Gael Guennebaud
cdd3e85060 Automatically produce a tgz archive of the documentation. 2011-11-05 21:59:36 +01:00
Gael Guennebaud
b4d1d4a2e0 completely remove EIGEN_BUILD_BLAS_LAPACK option 2011-11-05 13:26:53 +01:00
Gael Guennebaud
c5ddaf0c87 fix compilation 2011-11-05 10:54:05 +01:00
Gael Guennebaud
1de769d122 remove deprecated assert 2011-11-04 14:42:54 +01:00
Gael Guennebaud
05de3dddca use runtest.sh script iif /bin/bash does exist 2011-11-03 17:37:25 +01:00
Gael Guennebaud
94d87abbdb fix fftw cmake stuff 2011-11-03 15:33:42 +01:00
Jitse Niesen
a594ac3966 Allow for more iterations in SelfAdjointEigenSolver (bug #354).
Add an assert to guard against using eigenvalues that have not converged.
Add call to info() in tutorial example to cover non-convergence.
2011-11-02 14:18:20 +00:00
Gael Guennebaud
57207239f3 Mention that the axis in AngleAxis have to be normalized. 2011-11-01 09:40:51 +01:00
Jan Oberländer
fa7c08a831 bug #365 - Rename B0 in GeneralBlockPanelKernel.h to avoid name clash
with termios.h on POSIX systems.
2011-10-31 10:44:09 -04:00
Benoit Jacob
0cf2a05f3e bug #365 - Add test for non-usage of B0 2011-10-31 10:44:06 -04:00
Benoit Jacob
9df2f5c923 bug #369 - Quaternion alignment is broken
The problem was two-fold:
 * missing aligned operator new
 * Flags were mis-computed, the Aligned constant was misused
2011-10-31 09:23:41 -04:00
Benoit Jacob
0609dbeec6 fix more variable-set-but-not-used warnings on gcc 4.6 2011-10-31 00:51:36 -04:00
Benoit Jacob
6a1caf0351 Fix some unused-variable warnings with GCC 4.6 2011-10-30 23:55:20 -04:00
Adolfo Rodriguez Tsourouksdissian
4477843bdd bug #206 - part 4: Removes heap allocations from JacobiSVD and its preconditioners 2011-10-30 23:55:20 -04:00
Adolfo Rodriguez Tsourouksdissian
5e431779f3 bug #206 - part 3: Reimplement FullPivHouseholderQR<T>::matrixQ() using ReturnByValue 2011-03-08 19:04:31 +01:00
Adolfo Rodriguez Tsourouksdissian
7bf0e8cd82 bug #206 - part 2: For HouseholderSequence objects, added non-allocating versions of evalTo() and applyThisOnTheRight/Left that take additional working vector parameters. 2011-10-30 23:55:16 -04:00
Benoit Jacob
bca18a13ea The most important inline keyword ever? Without it, gcc failed to inline this function, which is called by all matrix constructors... 2011-10-25 20:45:26 -04:00
Gael Guennebaud
d7e70edfb3 remove the MSVC specific blas/lapack option 2011-10-24 13:40:01 +02:00
Gael Guennebaud
e44c19d1cc hopefully this workaround of cmake bug #9220 works for MSVC too 2011-10-24 13:36:49 +02:00
Gael Guennebaud
1ddf88060b update sparse*sparse product: the default is now a conservative algorithm preserving symbolic non zeros. The previous with auto pruning of the small value is avaible doing: (A*B).pruned() or (A*B).pruned(ref) or (A*B).pruned(ref,eps) 2011-10-24 11:44:53 +02:00
Gael Guennebaud
a997dacc67 mark deprecated sparse solvers as so. 2011-10-24 09:51:02 +02:00
Gael Guennebaud
39d4585bff add the possiibility to disable deprectated warnings (useful for deprecated unit tests!) 2011-10-24 09:40:37 +02:00
Gael Guennebaud
5d43b4049d factorize solving with guess 2011-10-24 09:33:24 +02:00
Gael Guennebaud
70df09b76d move DynamicSparseMatrix to SparseExtra 2011-10-24 09:31:33 +02:00
Gael Guennebaud
a2d414f568 move the blas.h header to blas/ and remove declaration of function returning a complex 2011-10-19 16:29:43 +02:00
Benoit Jacob
de69129f56 forgot inline keyword 2011-10-17 08:49:59 -04:00
Benoit Jacob
16b638c159 Throw std::bad_alloc even when exceptions are disabled, by doing new int[size_t(-1)].
Don't throw exceptions on aligned_malloc(0) (just because malloc's retval is null doesn't mean error, if size==0).
Remove EIGEN_NO_EXCEPTIONS option, use only compiler standard defines. Either exceptions are enabled or they aren't.
2011-10-17 08:44:44 -04:00
Benoit Jacob
dcbc985a28 bug #363 - add test for integer overflow in size computations 2011-10-16 16:12:19 -04:00
Benoit Jacob
739559b08a bug #363 - check for integer overflow in size=rows*cols computations 2011-10-16 16:12:19 -04:00
Benoit Jacob
0c6055c285 bug #363 - check for integer overflow in byte-size computations 2011-10-16 16:12:19 -04:00
Gael Guennebaud
c1170d2e93 update the decomposition catalogue 2011-10-14 21:21:38 +02:00
Gael Guennebaud
3fce43a704 add a basic ILU preconditioner 2011-10-11 20:41:43 +02:00
Gael Guennebaud
a5761d6dd7 fix sparse tri-solve for full matrices 2011-10-11 20:35:52 +02:00
Gael Guennebaud
15cb4f5b09 extend BiCGSTAB to arbitrary rhs 2011-10-11 19:53:18 +02:00
Gael Guennebaud
21d27c6f71 add proper bicgstab unit test 2011-10-11 19:38:36 +02:00
Gael Guennebaud
cd3c2451b6 add a unit test for permutation applied to sparse objects 2011-10-11 13:45:27 +02:00
Gael Guennebaud
3172749f32 refactor sparse solving unit tests 2011-10-11 11:32:26 +02:00
Gael Guennebaud
4f237f035c extend SimplicialCholesky for sparse rhs, and add determinant 2011-10-11 11:31:12 +02:00
Gael Guennebaud
5dc8458293 extend CG for multiple right hand sides 2011-10-11 11:29:50 +02:00
Gael Guennebaud
b94c00226f make it compatible with Diagonal<> 2011-10-11 11:28:13 +02:00
Gael Guennebaud
ae9c96a32d fix assignment to a set of sparse inner vectors 2011-10-10 16:16:37 +02:00
Gael Guennebaud
4e7f38ffc7 fix nesting 2011-10-09 22:19:01 +02:00
Gael Guennebaud
e97879857b DiagonalPrecond: fix potential segfault in case the diagonal contains explciit zeros 2011-10-09 22:17:37 +02:00
Gael Guennebaud
1beb8a6564 add a generic unit test for sparse SPD problems 2011-10-09 21:50:02 +02:00
Gael Guennebaud
2fc1b58cd2 split SimplicialCholesky into SimplicialLLt and SimplicialLDLt classes and add specific factor access functions 2011-10-09 21:45:55 +02:00
Hauke Heibel
e1dec359ba Configured unsupported/test/mpreal/*.* as CRLF files. 2011-10-04 11:57:49 +02:00
Hauke Heibel
b96d0bd240 Added a flag to build blas/lapack. 2011-10-04 11:23:55 +02:00
Gael Guennebaud
683ea3c93f fix superLU when the salver is called multiple times 2011-09-27 18:30:53 +02:00
Jitse Niesen
ac3ad9c1e7 Convert tabs to spaces. 2011-09-27 15:47:04 +01:00
Jitse Niesen
17c321617d Fix bug #286: Infinite loop in JacobiSVD with denormals 2011-09-27 14:25:02 +01:00
Bram de Jong
961a825b97 Add method which returns worst time (and make some methods const). 2011-09-26 14:39:23 +01:00
Gael Guennebaud
9bba0e7ba1 clean sparse LU tests 2011-09-24 17:15:37 +02:00
Gael Guennebaud
b2988375e8 fix a couple of issues in SuperLU support (memory and determinant) 2011-09-24 14:20:31 +02:00
Gael Guennebaud
6799fabba9 port umfpack support to new API 2011-09-24 14:19:39 +02:00
Gael Guennebaud
d8ae978b65 fix some compilation issues 2011-09-23 16:28:26 +02:00
Gael Guennebaud
823b2105b6 fix atan2 when tmp4==0 2011-09-22 17:34:25 +02:00
Gael Guennebaud
b0adbfbae7 BiCGSTAB does not like starting from 0... 2011-09-21 18:08:08 +02:00
Gael Guennebaud
c331c092d5 no comment 2011-09-21 14:20:41 +02:00
Gael Guennebaud
7301f4345c quick workaround of MSVC9' ICE in pset1 2011-09-21 14:18:41 +02:00
Gael Guennebaud
83563dee3c find macport' umfpack/cholmod 2011-09-21 10:28:09 +02:00
Gael Guennebaud
ebfed5a512 Enable incomplete BLAS/Lapack builds when no fortran compiler has been found.
Works here with gcc. Hopefully this will work for MSVC too.
2011-09-21 10:27:38 +02:00
Gael Guennebaud
1d796acb05 fix status after initialization 2011-09-20 18:45:50 +02:00
Gael Guennebaud
5d1836b182 accept both STL and Eigen's containers for reserve() 2011-09-20 02:04:03 +02:00
Jitse Niesen
e0a6ce50dd Typo in geometry tutorial. 2011-09-19 21:57:26 +01:00
Jitse Niesen
2092b45d0d Bug fix for matrix1 * matrix2 * scalar1 * scalar2.
See report on http://forum.kde.org/viewtopic.php?f=74&t=96947 .
2011-09-19 15:07:19 +01:00
Chen-Pang He
16b13596a6 mainly enhance MatrixLogarithm's performance for RealScalar != double 2011-09-17 21:00:55 +08:00
Gael Guennebaud
edf4c4b217 add support for macosx 2011-09-17 10:57:27 +02:00
Gael Guennebaud
9053729d68 add a bi conjugate gradient stabilized solver 2011-09-17 10:54:14 +02:00
Gael Guennebaud
f4122e9f94 add tan, acos, asin 2011-09-14 08:35:54 +02:00
Jitse Niesen
6b006772f1 Fix LDLT::solve() if matrix singular but solution exists (bug #241).
Clarify this in docs and add regression test.
2011-09-11 06:30:53 +01:00
Jitse Niesen
59b83c14fd Write page on template and typename keywords in C++.
After yet another question on the forum, I decided to write something on this
common issue. Now we just need to link to this and get people to read it.
Thanks to mattb on the forum for some links. Caveat: IANALL (I am not a
language lawyer).
2011-09-10 09:18:18 +01:00
Gael Guennebaud
3e7aaadb1d fix bench_gemm 2011-09-09 10:36:20 +02:00
Gael Guennebaud
d52d8e4a53 reactivate the sorting in the experimental sparse-sparse product 2011-09-08 13:43:32 +02:00
Gael Guennebaud
7706bafcfd add the possibility to reserve room for inner vector in SparseMatrix 2011-09-08 13:42:54 +02:00
Jitse Niesen
7898281b2b Put docs for unsupported modules in right place.
Doxygen was confused by the unsupported modules being partly in the doc/
directly, instead of completely in unsupported/doc/ . Thus, the link to
the unsupported modules on the server did not work (I think this manifested
itself after doxygen was upgraded on the server).
2011-09-07 04:19:12 +01:00
Jitse Niesen
b38d3b360e Define log2() on FreeBSD (fixes bug #343). 2011-09-06 06:52:04 +01:00
Gael Guennebaud
f1d98aad1b add atan2 support in AutoDiff and remove superfluous std:: specializations 2011-09-05 17:47:58 +02:00
Gael Guennebaud
063042bca3 Merged in trevorw/eigen (pull request PR-7) 2011-09-05 10:55:49 +02:00
Jitse Niesen
477d3e5726 Update docs of PlainObjectBase::Map(); fixes bug #335.
Also fix some typos.
2011-09-03 15:18:21 +01:00
Jitse Niesen
a2feb6f3c7 Add defensive assert to MatrixExponential, 2011-09-03 04:58:06 +01:00
Chen-Pang He
dd598ef8ce enhance efficacy via avoiding exception handling 2011-09-02 00:15:02 +08:00
Trevor Wennblom
6b31aa4bd1 resolve pkgconfig destination - #338 2011-08-30 19:15:16 -05:00
Jitse Niesen
7ee084f82f Leverage triangular square root in matrix log. 2011-08-25 07:42:32 +01:00
Jitse Niesen
c01ed935dd Split code for (quasi)triangular matrices from MatrixSquareRoot.
This way, (quasi)triangular matrices can avoid the costly Schur decomposition.
2011-08-25 07:42:21 +01:00
Chen-Pang He
8ddd1e390b fix: <ctime> is necessary for srand(time(NULL)) 2011-08-24 18:26:38 +08:00
Gael Guennebaud
8414be739b fix bug #330: Index to int conversion warning 2011-08-23 11:02:10 +02:00
Gael Guennebaud
b3f5fbbd9a oops EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION now perfroms full specialization,
no need for the typename keywords
2011-08-22 10:48:04 +02:00
Gael Guennebaud
b85c89c313 fix bug #262: Compilation error of stdvector_overload test with GCC 4.6
Now our aligned allocator is automatically activatived only when the user
did not specified an allocator (or specified the default std::allocator).
2011-08-22 10:12:10 +02:00
Jitse Niesen
9bf4d709e4 Fix failures in redux test caused by underflow in .prod() test. 2011-08-21 00:51:15 +01:00
Jitse Niesen
9e667e28f5 Add coverage for long double to matrix_exponential test. 2011-08-21 00:20:29 +01:00
Chen-Pang He
6d7a32231d add compatibility with long double 2011-08-20 12:33:51 +08:00
Gael Guennebaud
ea4a1960f0 mv the mpreal copy in its own folder 2011-08-19 15:08:29 +02:00
Gael Guennebaud
79ad55a901 update to latest mpreal and fix a min/max issue in mprel.h 2011-08-19 15:03:45 +02:00
Gael Guennebaud
42e2578ef9 the min/max macros to detect unprotected min/max were undefined by some std header,
so let's declare them after and do the respective fixes ;)
2011-08-19 14:18:05 +02:00
Gael Guennebaud
5734ee6df4 add the possibility to specialize assign_impl and still call the default implementations.
(yes I know this change will be deprecated as soon as the evaluators will be in shape but I need this now)
2011-08-18 10:19:25 +02:00
Gael Guennebaud
ca7d3dca79 fix linking issue 2011-08-12 22:38:53 +02:00
Gael Guennebaud
f162f7c323 fix a numerical issue in the direct 3x3 eigenvector extraction 2011-08-08 10:46:26 +02:00
Thomas Capricelli
a660e6425c fix a bug where some rotations were not initialized
They actually were in the original minpack code, this is a bug introduced
by our migration.
Reported on #322 and
http://forum.kde.org/viewtopic.php?f=74&t=96197#p201158
2011-08-04 05:02:04 +02:00
Thomas Capricelli
5748d3c96f wa2 was computed twice because of a confustion between changesets
746c787a76
 and ee0e39284c
.
Reported on forum:
http://forum.kde.org/viewtopic.php?f=74&t=96197#p201158
2011-08-04 03:27:01 +02:00
Jitse Niesen
b12522f696 Remove unnecessary template keywords (breaks compilation under MSVC).
Thanks to Hauke for finding this.
2011-07-28 13:55:56 +01:00
Hauke Heibel
3431c052c6 Improved compilation errors for Transform initialization/assignment with different numeric types. 2011-07-28 09:35:17 +02:00
Gael Guennebaud
3a2cabc275 compilation fix with conjugate_gradient_solve_retval_with_guess 2011-07-26 14:43:20 +02:00
Gael Guennebaud
51f706b916 add the possibility to configure the preconditioner 2011-07-26 09:22:18 +02:00
Gael Guennebaud
66fa6f39a2 add a naive IdentityPreconditioner 2011-07-26 09:17:18 +02:00
Gael Guennebaud
80b1d1371d add a conjugate gradient solver 2011-07-26 09:04:10 +02:00
Gael Guennebaud
8fa7e92e77 fix sparse selfadjoint time dense such that the other triangular part is not used at all 2011-07-26 09:02:41 +02:00
Gael Guennebaud
97ac0fd192 fix eigen2 support min/max garbage 2011-07-22 11:37:41 +02:00
Gael Guennebaud
e8313364c1 simplify a bit the 2x2 direct eigenvalue solver 2011-07-22 11:21:43 +02:00
Gael Guennebaud
47a2bca89f integrate Hauke's 2x2 direct symmetric eigenvalues solver 2011-07-22 09:43:14 +02:00
Gael Guennebaud
26d7dad138 add a computeDirect method to SelfAdjointEigenSolver for fast eigen decomposition 2011-07-21 19:07:52 +02:00
Gael Guennebaud
22bff949c8 protect calls to min and max with parentheses to make Eigen compatible with default windows.h
(transplanted from 49b6e9143e
)
2011-07-21 11:19:36 +02:00
Gael Guennebaud
d4bd8bddb5 fix bug #320 (pretty gdb printer on mingw) 2011-07-20 11:15:42 +02:00
Hauke Heibel
705023fd85 Translation * RotationBase now returns an isometric transformation. 2011-07-19 11:13:40 +02:00
Gael Guennebaud
3fb65734ab fix triangular unit test: it only accepts small matrices 2011-07-19 10:45:42 +02:00
Gael Guennebaud
22cc2b727b fix trmv unit test 2011-07-19 10:44:44 +02:00
Gael Guennebaud
38a4e3053d fix LLT rank one update for "upper" hermitian matrices 2011-07-19 10:09:43 +02:00
Gael Guennebaud
0d02182ae8 add an "InvalidInput" enum, used by the SuperLU interface 2011-07-18 13:37:41 +02:00
Gael Guennebaud
a8f66fec65 add the possibility to configure the maximal matrix size in the unit tests 2011-07-12 14:41:00 +02:00
Gael Guennebaud
bdb545ce3b enable instalation of blas and lapack libs 2011-07-11 17:02:09 +02:00
Gael Guennebaud
5fdebc2fa5 fix bug #316 - SelfAdjointEigenSolver::compute does not handle matrices of size (1,1) correctly 2011-07-09 07:15:14 +02:00
Thomas Capricelli
08074843ac fix few warnings reported by clang 2011-07-07 22:20:04 +02:00
Gael Guennebaud
c52268c649 suppress polluting EMPTY macro defined by SuperLU 2011-07-07 16:42:51 +02:00
Gael Guennebaud
2489c81562 add new interface to SuperLU 2011-07-07 14:19:42 +02:00
Gael Guennebaud
c98cd5e564 fix constness of intersection methods (bug #309) 2011-06-27 13:15:01 +02:00
Jitse Niesen
0b308e79c4 Add DenseStorage specializations for dynamic size with MaxSize = 0 (bug #288).
This is necessary for instantiations like Matrix<float,Dynamic,Dynamic,0,0,0>.
2011-06-24 13:47:11 +01:00
Jitse Niesen
16db255333 Fix compilation of cholesky rank update test. 2011-06-24 13:41:23 +01:00
Thomas Capricelli
9b52fe0432 fix typo in doc for ParametrizedLine 2011-06-23 00:36:24 +02:00
Gael Guennebaud
3ecf7e8f6e add a KroneckerProduct module (unsupported) from Kolja Brix and Andreas Platen materials. 2011-06-22 14:39:11 +02:00
Gael Guennebaud
7aabce7c76 rm confusing sentence 2011-06-17 09:46:05 +02:00
Tim Holy
16a2d896bc Relatively straightforward changes to wording of documentation, focusing particularly on the sparse and (to a lesser extent) geometry pages. 2011-06-20 22:47:58 -05:00
Tim Holy
4a95badf74 A first tiny test commit: fix a spelling error in the documentation. 2011-06-19 14:39:19 -05:00
Gael Guennebaud
2f32e48517 New feature: add rank one update in Cholesky decomposition 2011-06-20 15:05:50 +02:00
Gael Guennebaud
a55c27a15f fix documentation of norm 2011-06-18 08:30:34 +02:00
Zach Ploskey
642d452921 Suggest placing Eigen directory in system include path. 2011-06-17 15:46:50 -07:00
Zach Ploskey
e3491beb48 Fixed a few typos and cleaned up some language. 2011-06-17 15:42:15 -07:00
Benoit Jacob
a871f3cdb8 adapt test to the change reverting normalize() to returning void 2011-06-15 10:00:43 -04:00
Benoit Jacob
aedccbf52f back out 842881cfb1 2011-06-15 09:59:10 -04:00
Benoit Jacob
d2673d89bd add test for normalize() and normalized() 2011-06-15 00:30:46 -04:00
Andy Somerville
842881cfb1 bug #298 - let normalize() return a reference to *this 2011-06-15 00:30:11 -04:00
Gael Guennebaud
40287d2fd9 remove the use of non standard long long 2011-06-14 10:56:47 +02:00
Gael Guennebaud
f82b3ea241 fix aligned_allocator::allocate interface 2011-06-14 08:50:25 +02:00
Thomas Capricelli
cf04a7c682 fix typo in constant name 2011-06-12 23:54:28 +02:00
Gael Guennebaud
6d3dee1b66 introduce a smart_copy internal function and fix sparse matrices with non POD scalar type 2011-06-09 19:04:06 +02:00
Jitse Niesen
8c8ab9ae10 Implement matrix logarithm + test + docs.
Currently, test matrix_function_1 fails due to bug #288.
2011-06-07 14:44:43 +01:00
Jitse Niesen
a6d42e28fe Decouple MatrixFunction and MatrixFunctionAtomic
in preparation for implementation of matrix log.
2011-06-07 14:40:27 +01:00
Jitse Niesen
86ca35ccff Fix and test MatrixSquareRoot for 1-by-1 matrices. 2011-06-07 14:32:16 +01:00
Gael Guennebaud
91fe1507d1 Sparse: more fixes regarding long int as index type 2011-06-07 11:28:16 +02:00
Gael Guennebaud
421ece38e1 Sparse: fix long int as index type in simplicial cholesky and other decompositions 2011-06-06 10:17:28 +02:00
Jitse Niesen
7a61a564ef Fix snippets for operator|| and && by adding pair of parens. 2011-06-03 11:17:08 +01:00
Gael Guennebaud
5bc4abc45e fix compilation with MinGW 2011-06-01 12:16:21 +02:00
Gael Guennebaud
562d3ea91d forgot to include this file in previous commit 2011-06-01 10:49:36 +02:00
Gael Guennebaud
35c1158ee3 add boolean || and && operators 2011-05-31 22:17:34 +02:00
Gael Guennebaud
b495203310 update URL 2011-05-31 19:07:15 +02:00
Gael Guennebaud
5830f90983 add read/write routines for sparse matrices in the Market format 2011-05-31 18:58:04 +02:00
Jitse Niesen
9d6fdbced7 Fix truncated instructions for printers.py
... as noted by kp0987 on forum
2011-05-30 16:15:11 +01:00
Gael Guennebaud
5b71d44e18 fix bug #278: geometry tutorial
(transplanted from 3cd1641dac
)
2011-05-28 22:12:15 +02:00
Gael Guennebaud
9464745385 do not directly call std::ceil 2011-05-28 16:46:38 +02:00
Gael Guennebaud
7b46d7ed0f finish to fix bug #270: we have to use EIGEN_ALIGN_STATICALLY and not EIGEN_DONT_ALIGN_STATICALLY... 2011-05-28 11:38:53 +02:00
Jitse Niesen
d23845c4cc Fix typo ('using namespace' instead of 'using'). 2011-05-26 09:52:36 +01:00
Gael Guennebaud
87ac09daa8 Simplify the use of custom scalar types, the rule is to never directly call a standard math function using std:: but rather put a using std::foo before and simply call foo:
using std::max;
max(a,b);
2011-05-25 08:41:45 +02:00
Gael Guennebaud
5541bcb769 bug #225: add a unit test for memory leak 2011-05-23 14:20:49 +02:00
Gael Guennebaud
117d17ee58 bug #271: fix copy/paste mistakes in doc
(transplanted from 145b9cad63101ee46924d446fa8b08ffb48c7f3a)
2011-05-23 13:39:26 +02:00
Gael Guennebaud
46bee5682f clean a bit previous patch (ctor vs static_cast and a few bits) 2011-05-23 13:34:04 +02:00
David H. Bailey
074b067624 fix implicit scalar conversions (needed to support fancy scalar types, see bug #276) 2011-05-23 11:20:13 +02:00
Gael Guennebaud
7209d6a126 fix gemv_static_vector_if on architectures that cannot aligned on the stack (e.g., ARM NEON) 2011-05-21 22:15:11 +02:00
Gael Guennebaud
96464f8563 clean several other assertion checking tests 2011-05-20 09:59:15 +02:00
Gael Guennebaud
501bc602ec fix vectorization_logic when EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 2011-05-19 21:52:40 +02:00
Gael Guennebaud
f2837aebc4 NEON: fix plset 2011-05-18 21:12:08 +02:00
Gael Guennebaud
8170ef0b2d add unit test for plset 2011-05-18 21:11:03 +02:00
Gael Guennebaud
7f2a88c91f NEON: disable unaligned assertion checking for non vectorized types 2011-05-18 14:11:40 +02:00
Gael Guennebaud
85c137ccd4 NEON: fix ploaddup 2011-05-18 08:15:47 +02:00
Gael Guennebaud
179d42bb2b fix bug #267: alloca is not aligned on arm 2011-05-17 21:30:12 +02:00
Gael Guennebaud
d4fd298fbb Autodiff: fix scalr - active_scalar 2011-05-14 22:38:41 +02:00
Jitse Niesen
9a06055870 Store light-weight objects in evaluators by value.
This resolves failure in unit test caused by dying temporaries.
2011-05-13 14:05:59 +01:00
Gael Guennebaud
a34a216e82 AutoDiff: add one missing operator- version 2011-05-12 23:40:19 +02:00
Gael Guennebaud
3de2f4b75a AutoDiff: fix most of bug #234 (missing operators, used old internal math function interface, etc) 2011-05-12 23:36:33 +02:00
Gael Guennebaud
ae3b6cc324 AutoDiff: fix unary operator- 2011-05-12 22:27:51 +02:00
Jitse Niesen
e22a523021 Remove Eigen::internal::sqrt(), see bug #264. 2011-05-12 16:52:56 +01:00
John Tytgat
0aa7425f15 fix bug #260: broken Qt support for Transform
(transplanted from 84c8b6d5c5
)
2011-05-11 22:31:36 +02:00
Jitse Niesen
0c463a21c4 Forgot to 'hg add' example file in last commit. 2011-05-10 09:59:58 +01:00
Jitse Niesen
d7e3c949be Implement and document MatrixBase::sqrt(). 2011-05-09 22:20:20 +01:00
Jitse Niesen
dac4bb640a Fix compilation error under GCC 4.5.
That version is stricter in forcing function prototype and definition
to match.
2011-05-09 13:57:06 +01:00
Jitse Niesen
837db08cbd Add test for sqrt() on complex Arrays.
From Gael's dashboard output of matrix_square_root test, I suspect the
test committed here may fail on old gcc.
2011-05-09 10:17:41 +01:00
Jitse Niesen
6e1573f66a Implement square root for real matrices via Schur. 2011-05-08 22:18:37 +01:00
Jitse Niesen
6b4e215710 Implement matrix square root for complex matrices.
I hope to implement the real case soon, but it's a bit more
complicated due to the 2-by-2 blocks in the real Schur decomposition.
2011-05-07 22:57:46 +01:00
Jitse Niesen
0896c6d97d Get rid of wrong "subscript above bounds" warning (bug #149). 2011-05-07 18:44:11 +01:00
Gael Guennebaud
4e7e5d09e1 s/n=n/EIGEN_UNUSED_VARIABLE(n) 2011-05-06 21:29:19 +02:00
Gael Guennebaud
fb76452cbc add missing .data() members to MatrixWrapper and ArrayWrapper 2011-05-06 21:15:05 +02:00
Gael Guennebaud
97b6d26f5b fix compilation on ARM NEON (missing AlignedOnScalar) 2011-05-06 09:03:48 +02:00
Thomas Capricelli
883219041f better fix for gcc 4.6.0 / ptrdiff_t, as suggested by Benoit 2011-05-05 18:48:18 +02:00
Thomas Capricelli
a18a1be42d Fix compilation with gcc-4.6.0, patch provided by Anton Gladky <gladky.anton@gmail.com>,
working on debian packaging.
2011-05-05 00:44:24 +02:00
Jitse Niesen
012419166e Bail out if preprocessor symbol Success is defined (bug #253). 2011-05-04 14:28:45 +01:00
Jitse Niesen
781e75cbd7 Document some more preprocessor symbols:
EIGEN_NO_MALLOC, EIGEN_RUNTIME_NO_MALLOC, eigen_assert.
2011-05-04 14:13:20 +01:00
Jitse Niesen
cc23b0a3d9 Remove unused enums in Constants.h . 2011-05-03 17:20:54 +01:00
Jitse Niesen
a96c849c20 Document enums in Constants.h (bug #248).
To get the links to work, I also had to document the Eigen namespace.
Unfortunately, this means that the word Eigen is linked whenever it appears
in the docs.
2011-05-03 17:08:14 +01:00
Gael Guennebaud
1947da39ab fix bug #258: asin/acos copy paste mistake 2011-05-02 13:26:44 +02:00
Hauke Heibel
10426b7647 Final working fix for the EOL extension.
MSVC debugger tools are now forced to CRLF.
2011-04-30 18:10:17 +02:00
Hauke Heibel
0358a8247c This should fix the eol extension. 2011-04-30 17:46:40 +02:00
Hauke Heibel
9e0c8549ce Fixed Unix script line ending conversions. 2011-04-30 17:35:51 +02:00
Jitse Niesen
06fb7cf470 Implement compound assignments using evaluator of SelfCwiseBinaryOp. 2011-04-28 16:57:35 +01:00
Jitse Niesen
3b60d2dbc4 Implement swap using evaluators. 2011-04-28 15:52:15 +01:00
Jitse Niesen
2d11041e24 Use copyCoeff/copyPacket in copy_using_evaluator. 2011-04-22 22:36:45 +01:00
Jitse Niesen
3457965bf5 Implement evaluator for Diagonal. 2011-04-22 22:36:45 +01:00
Jitse Niesen
f924722f3b Implement evaluators for Reverse. 2011-04-22 22:36:45 +01:00
Jitse Niesen
bb2d70d211 Implement evaluators for ArrayWrapper and MatrixWrapper. 2011-04-22 22:36:45 +01:00
Gael Guennebaud
6441e8727b fix aligned_stack_memory_handler for null pointers 2011-04-21 09:00:55 +02:00
Mathieu Gautier
392eb9fee8 Quaternion : add Flags on Quaternion's traits with the LvalueBit set if needed
Quaternion : change PacketAccess to IsAligned to mimic other traits
test : add a test and 4 failtest on Map<const Quaternion> based on Eigen::Map ones
2011-04-12 14:49:50 +02:00
Gael Guennebaud
f85db18c1c I doubt this change was intented to be committed
ss: Enter commit message.  Lines beginning with 'HG:' are removed.
2011-04-20 08:15:09 +02:00
Thomas Capricelli
50c00d14c8 be nice with the server : dont use -j3 2011-04-19 17:41:59 +02:00
Gael Guennebaud
e87f653924 fix bug #250: compilation error with gcc 4.6 (STL header files no longer include cstddef) 2011-04-19 16:34:25 +02:00
Gael Guennebaud
67d50f539b fix bug #242: vectorization was wrongly enabled on MSVC 2005 2011-04-19 15:25:00 +02:00
Eamon Nerbonne
e48bc0dfe3 WIN32 isn't defined ?? but _WIN32 is. 2011-04-19 14:37:04 +02:00
Jitse Niesen
0b40b36d10 Make MapBase(PointerType) constructor explicit (fixes bug #251) 2011-04-19 12:13:04 +01:00
Benoit Jacob
820545cddb fix unaligned-array-assert link 2011-04-18 06:35:54 -04:00
Jitse Niesen
c9b5531d6c Normalize eigenvectors returned by EigenSolver (fixes bug #249)
because the documentation says that we do this.
Also, add a unit test to cover this.
2011-04-15 17:39:59 +01:00
Jitse Niesen
e654405900 Implement unrolling in copy_using_evaluator() . 2011-04-13 11:49:48 +01:00
Jitse Niesen
7e86324898 Implement evaluator for PartialReduxExpr as a dumb wrapper. 2011-04-13 09:49:10 +01:00
Jitse Niesen
11164830f5 Implement evaluator for Replicate. 2011-04-12 22:54:31 +01:00
Jitse Niesen
12a30a982f Implement evaluator for Select. 2011-04-12 22:34:16 +01:00
Jitse Niesen
88b3116b99 Decouple AssignEvaluator.h from assign_traits from Assign.h 2011-04-12 13:35:08 +01:00
Gael Guennebaud
0c146bee1b enforce no inlining of the GEBP product kernel: this is a big
function that makes no sense to inline, though GCC was thinking
the opposite. This even slighlty improve the perf. And as a side
effect this workaround a weird GCC-4.4 linking bug (see
"Problem with g++-4.4 -O2 and Eigen3" in the ML)
2011-04-07 18:49:45 +02:00
Jitse Niesen
eae5a6bb09 Decouple Cwise*Op evaluators from expression objects 2011-04-05 18:30:51 +01:00
Jitse Niesen
11ea81858a Implement evaluator for CwiseUnaryView 2011-04-05 18:20:43 +01:00
Jitse Niesen
cca7b146a2 Implement evaluator for Map 2011-04-05 18:15:59 +01:00
Gael Guennebaud
a6b5314c20 Performance tunning for TRMM products 2011-04-05 11:20:50 +02:00
Jitse Niesen
ae06b8af5c Make evaluators for Matrix and Array inherit from common base class.
This gets rid of some code duplication.
2011-04-04 15:35:14 +01:00
Jitse Niesen
afdd26f229 Do some of the actual work in evaluator for Block.
Also, add simple accessor methods to Block expression class.
2011-04-04 13:44:50 +01:00
Gael Guennebaud
0d58c36ffd std::min/max are not implemented and they cannot be implemented easily 2011-04-04 16:26:43 +02:00
Jitse Niesen
70d5837e00 Correct typo in QuickReference doc, plus typographical improvements. 2011-04-01 16:58:51 +01:00
Gael Guennebaud
77a1373c3a fix trmm unit test 2011-03-31 15:32:21 +02:00
Jitse Niesen
d90a8ee8bd Evaluators: add Block evaluator as dumb wrapper, add slice vectorization. 2011-03-31 13:50:52 +01:00
Gael Guennebaud
b471161f28 fix typo and remove unused declaration. 2011-03-31 10:02:02 +02:00
Adam Szalkowski
969e92261d fix bug #239: the essential part was left uninitialized in some cases 2011-03-31 09:54:52 +02:00
Jitse Niesen
10dae8dd4d Add directory containing split_test_helper.h to include path. 2011-03-29 14:17:49 +01:00
Jitse Niesen
8175fe43e0 Evaluators: Make inner vectorization more similar to default traversal. 2011-03-28 21:29:47 +01:00
Gael Guennebaud
00991b5b64 extend trmm/trmv unit test to thoroughly check all configurations 2011-03-28 17:45:16 +02:00
Gael Guennebaud
4f1419e9c3 add the possibility to specify a list of sub-test suffixes in a compact way 2011-03-28 17:43:59 +02:00
Gael Guennebaud
6feb1d3c0b fix trmv for Strictly* triangular matrices and trapezoidal matrices 2011-03-28 17:42:26 +02:00
Gael Guennebaud
568478ffe5 fix trmm for some unusual trapezoidal cases (a dense set of columns or rows is zero) 2011-03-28 17:41:46 +02:00
Gael Guennebaud
f4ac7d2b43 automatically generate the CALL_SUBTEST_* macros 2011-03-28 17:39:05 +02:00
Jitse Niesen
b175bc464f Evaluators: Implement linear traversal, better testing. 2011-03-27 22:08:48 +01:00
Jitse Niesen
1b17a674dd Evaluators: Implement inner vectorization.
The implementation is minimal (I only wrote the functions called by
the unit test) and ugly (lots of copy and pasting).
2011-03-27 13:49:15 +01:00
Jitse Niesen
5c204d1ff7 Evaluators: Implement LinearVectorizedTraversal, packet ops in evaluators. 2011-03-25 16:30:41 +00:00
Gael Guennebaud
e6fa4a267a improve computation of the sub panel width 2011-03-24 23:42:25 +01:00
Gael Guennebaud
931814d7c0 improve performance of trsm 2011-03-24 23:19:53 +01:00
Jitse Niesen
c6ad2deead Bug fix in linspace_op::packetOp(row,col). Fixes bug #232.
Also, add regression test.
2011-03-24 10:42:11 +00:00
Gael Guennebaud
42bc1f77be impl basic product evaluator on top of previous one 2011-03-24 09:33:36 +01:00
Gael Guennebaud
abc8c0821c makes evaluator test use VERIFY_IS_APPROX 2011-03-23 17:23:56 +01:00
Gael Guennebaud
4ada45bc76 BTL: add eigen2 backend 2011-03-23 16:59:12 +01:00
Gael Guennebaud
7d24cf283a do not confuse Eigen3 or beta versions of Eigen3 with Eigen2 2011-03-23 16:58:45 +01:00
Gael Guennebaud
7bb4f6ae2f BTL: do not enable GOTO1 if GOTO2 was found 2011-03-23 16:28:43 +01:00
Gael Guennebaud
3ef0da6efb fix tridiagonalization action 2011-03-23 16:28:09 +01:00
Gael Guennebaud
816541d82c add a stupid Product<A,B> expression produced by prod(a,b), and implement a first version of its evaluator 2011-03-23 16:12:21 +01:00
Gael Guennebaud
cfd5c2d74e import evaluator works 2011-03-23 11:54:00 +01:00
Gael Guennebaud
611fc17894 add support for ublas 2011-03-23 11:39:35 +01:00
Gael Guennebaud
ec32d2c807 BTL: by default use current Eigen headers, and disable the novec version 2011-03-23 11:08:10 +01:00
Gael Guennebaud
b3e43246bc BTL: add a Eigen-blas backend 2011-03-23 11:00:31 +01:00
Gael Guennebaud
f9da1ccc3b BTL: clean the BLAS implementation 2011-03-23 10:35:54 +01:00
Gael Guennebaud
e35b1ef3f3 BTL: rm stupid backends 2011-03-23 10:07:24 +01:00
Gael Guennebaud
fe595e91ae update plot settings 2011-03-23 10:03:01 +01:00
Gael Guennebaud
9cca79f5ca update aat action to do a syrk operation, and remove (comment) ata action 2011-03-23 10:02:00 +01:00
Gael Guennebaud
da3f3586e0 BTl: GMM++ LU is not a full pivoting LU 2011-03-22 15:39:23 +01:00
Gael Guennebaud
22c7609d72 extend sparse product unit tests 2011-03-22 11:58:22 +01:00
Gael Guennebaud
5fda8cdfb3 fix 228 (ei_aligned_stack_delete does not exist anymore) 2011-03-21 21:59:42 +01:00
Benoit Jacob
eb9c6b6cfd merge 2011-03-21 06:46:27 -04:00
Benoit Jacob
bb8a25e94b fix typos 2011-03-21 06:45:57 -04:00
Gael Guennebaud
535a61ede8 port sparse LLT/LDLT to new stack allocation API 2011-03-20 17:10:43 +01:00
Benoit Jacob
eba023d082 make compile_snippet use Eigen/Dense 2011-03-20 11:48:53 -04:00
Gael Guennebaud
b8ecda5c66 clean a bit the stack allocation mechanism 2011-03-19 10:27:47 +01:00
Gael Guennebaud
bbb4b35dfc test the new stack allocation mechanism 2011-03-19 08:51:38 +01:00
Gael Guennebaud
290205dfc0 fix memory leak when a custom scalar throw an exception 2011-03-19 01:06:50 +01:00
Benoit Jacob
5991d247f9 bump 2011-03-18 05:27:58 -04:00
Gael Guennebaud
37c5341d64 fix compilation for old but not so old versions of glew 2011-03-18 10:26:21 +01:00
Gael Guennebaud
2359486129 disable testing of aligned members when aligned static allocation is not enabled (e.g., for gcc 3.4) 2011-03-15 09:53:23 +01:00
Gael Guennebaud
dd2e4be741 fix array_for_matrix unit test 2011-03-15 09:42:22 +01:00
Benoit Jacob
c5ef8f9027 Added tag 3.0-rc1 for changeset 4931a719f4 2011-03-14 14:10:12 -04:00
Benoit Jacob
4931a719f4 bump 2011-03-14 14:10:05 -04:00
Jitse Niesen
27f34269d5 Document EIGEN_DEFAULT_DENSE_INDEX_TYPE.
Also, expand description of EIGEN_DONT_ALIGN.
2011-03-11 11:15:44 +00:00
Jitse Niesen
e7d2376688 Change int to Index in equalsIdentity().
This fixes compilation errors in nullary test on 64-bits machines.
2011-03-11 11:06:13 +00:00
Benoit Jacob
dc36efbb8f fix bug #219: Map Flags AlignedBit was miscomputed, didn't account for EIGEN_ALIGN 2011-03-10 10:17:17 -05:00
Benoit Jacob
9a47fb289b add test for EIGEN_DONT_ALIGN and EIGEN_DONT_ALIGN_STATICALLY, cf recent bugs (214 etc) and changeset 56818d907e 2011-03-10 09:44:59 -05:00
Jitse Niesen
151e3294cf Fix equalsIdentity() for rectangular matrices. 2011-03-10 13:49:06 +00:00
Oliver Ruepp
5d1263e7c5 bug #37: fix resizing when the destination sparse matrix is row major 2011-03-08 16:37:59 +01:00
Gael Guennebaud
c6c6c34909 repeat nullary tests, and fix some tests 2011-03-07 16:41:59 +01:00
Jitse Niesen
931edea57d Tweak geo_quaternion test to squash intermittent failures. 2011-03-07 11:42:55 +00:00
Benoit Jacob
bfcad536e8 * bug #206: correctly forward computationOptions and work towards avoiding mallocs after preallocation, with unit test.
* added EIGEN_RUNTIME_NO_MALLOC and new set_is_malloc_allowed() function to implement that test
2011-03-06 20:59:25 -05:00
Benoit Jacob
b464fc19bc try to fix a ICC 11.1 compiler error (bug #217) 2011-03-06 19:27:31 -05:00
Benoit Jacob
c541d0a62e disable ICC 12 warning 279 - controlling expression is constant 2011-03-06 19:06:44 -05:00
Benoit Jacob
b43d92a5a2 The Eigen2 intrusive std::vector hack really can't be supported in eigen3 (bug #215) 2011-03-04 10:24:41 -05:00
Benoit Jacob
56818d907e Make EIGEN_ALIGN16 always align to fix crashes with EIGEN_DONT_ALIGN_STATICALLY. New macro EIGEN_USER_ALIGN16 had the old behavior i.e. honors user preference. 2011-03-04 09:57:49 -05:00
Sameer Sheorey
e9868f438b Changed debug/gdb/printers.py to correctly display variable sized matrices.
There is no python error now.
2011-03-02 10:47:54 -06:00
Gael Guennebaud
4f0909b5f0 fix bug #212 (installation of Eigen2Support/Geometry) 2011-03-04 14:16:58 +01:00
Jitse Niesen
6cac61ca3e Copy fix of unit test when GSL is enabled to eigen2 test suite. 2011-03-04 11:04:07 +00:00
Jitse Niesen
1180ede36d Escape hash character in docs as required by doxygen. 2011-03-03 15:19:11 +00:00
Jitse Niesen
99fa279ed1 Use copy_bool() workaround in Eigen2 test suite.
See bug #89 and changeset 59596efdf7
.
2011-03-03 14:17:23 +00:00
Jitse Niesen
dbab12d6b0 Fix bug #205: eigen2_adjoint_5 test fails. 2011-03-02 22:00:48 +00:00
Gael Guennebaud
dc727d86f1 extend unit tests of Transform * MatrixBase and Transform * Homogeneous 2011-03-02 19:34:39 +01:00
Gael Guennebaud
5cec29162b fix compilation in the case of 1D Transform 2011-03-02 19:29:55 +01:00
Gael Guennebaud
703c8a0cc6 fix compilation when mixing CompactAffine with Homogeneous objects 2011-03-02 19:27:13 +01:00
Gael Guennebaud
d30f0c0953 fix transform * matrix products: in particular it now truely considers the rhs as a set of (homogeneous) points and do not neglect the homogeneous coordinates in the case of affine transform 2011-03-02 19:26:38 +01:00
Gael Guennebaud
adacacb285 fix bug #204: limit integer values to numbers which are representable using float 2011-03-02 14:24:26 +01:00
Gael Guennebaud
c8e1b679fa re-enable fast pset1-pstore by introducing a new higher level pstore1 function 2011-03-02 10:55:44 +01:00
Gael Guennebaud
951e238430 now fixing "unsupported" "legacy" code... 2011-03-01 16:45:46 +01:00
Benoit Jacob
9c5c8d8916 Added tag 3.0-beta4 for changeset 77fc6a9914 2011-02-28 00:55:59 -05:00
Benoit Jacob
77fc6a9914 bump 2011-02-28 00:55:52 -05:00
Benoit Jacob
eef03525b8 fix bug #203: revert to using _mm_set1_p[sd] 2011-02-28 00:04:05 -05:00
Benoit Jacob
31621ff0ef relax condition in matrix_exponential test for clang 2011-02-27 23:25:14 -05:00
Benoit Jacob
0b44893b4e fix umeyama test 2011-02-27 23:20:45 -05:00
Benoit Jacob
8cad73072e fix stable_norm test: the |small| value was 0 on clang with complex<float>. 2011-02-27 22:35:49 -05:00
Benoit Jacob
9be2712bf7 remove now-useless comments 2011-02-27 22:35:17 -05:00
Benoit Jacob
0612768c1c fix bug #201: Clang too has intrinsics bugs preventing us to use custom unaligned loads 2011-02-27 21:59:07 -05:00
Benoit Jacob
32025a2510 disable BVH test on Clang++. Looks like there's a good reason why BVH is unsupported. It seems to have a very weird usage pattern, relying on an externally defined bounding_box function in a naive way. 2011-02-27 21:37:34 -05:00
Benoit Jacob
771e64200f fix compilation of unit tests with clang 2011-02-27 20:33:58 -05:00
Benoit Jacob
4846c76d9d shut up a stupid clang 2.8 warning 2011-02-27 20:18:03 -05:00
Benoit Jacob
afc9efca15 fix compilation with clang 2.8 2011-02-27 20:17:47 -05:00
Benoit Jacob
ea7d872181 documentation fixes 2011-02-27 17:43:10 -05:00
Benoit Jacob
b6299c974f add option to build in 32bit mode 2011-02-27 17:27:23 -05:00
Benoit Jacob
b3544ce2ae bug #195 - fix this once and for all: just never use _mm_load_sd on gcc/i386, it generates redundant x87 ops 2011-02-27 17:26:59 -05:00
Jitse Niesen
a8f5ef9388 Document (non)sorting of eigenvalues.
Also, update docs for (Generalized)SelfAdjointEigenSolver to reflect that these
two classes were split apart.
2011-02-27 14:06:55 +00:00
Jitse Niesen
58abf0eb98 Use absolute error to test sum in which cancellation may occur. 2011-02-25 08:56:37 +00:00
Gael Guennebaud
ef73265987 to ease debugging let's catch invalid template options in Transform 2011-02-25 09:03:24 +01:00
Gael Guennebaud
4fbd78d993 fix compilation with gcc 3.4 2011-02-25 09:02:15 +01:00
Benoit Jacob
5dfae4524b fix bug #195: fast unaligned load for integer using _mm_load_sd failed when the value interpreted as a NaN 2011-02-24 10:31:57 -05:00
Hauke Heibel
2064c59878 Improved docs of PlainObjectBase::conservativeResize methods. 2011-02-24 15:48:41 +01:00
Gael Guennebaud
bb9a465c5a fix AltiVec ploaddup 2011-02-24 00:23:50 +03:00
Gael Guennebaud
28d17c5390 bounds the range of random integers for AltiVec 2011-02-24 00:22:53 +03:00
Gael Guennebaud
4bfe38eda2 extend testing of ploaddup 2011-02-24 00:22:10 +03:00
Gael Guennebaud
23aae0d63e fix pset1 for complex 2011-02-23 21:24:47 +03:00
Gael Guennebaud
0dfea7fce4 improve packetmath unit test 2011-02-23 21:24:26 +03:00
Gael Guennebaud
c121e6f390 implement ploaddup for complex and SSE/NEON even though they are not used in practice 2011-02-23 16:31:42 +01:00
Gael Guennebaud
955c099eb5 implement ploaddup for altivec and add respective unit test 2011-02-23 18:20:55 +03:00
Gael Guennebaud
a00aaf7f7e fix overflow in packetmath unit test 2011-02-23 17:57:18 +03:00
Gael Guennebaud
6e01780541 fix a couple of issues with pcplxflip 2011-02-23 17:51:40 +03:00
Gael Guennebaud
939f0327b6 mention reverse and replicate in the quick ref 2011-02-23 15:31:16 +01:00
Gael Guennebaud
78e1a62c54 implement pcplxflip for altivec 2011-02-23 14:20:58 +01:00
Gael Guennebaud
59eeb67187 add unit test for pcplxflip 2011-02-23 14:20:33 +01:00
Gael Guennebaud
b8374aec00 implement workarounds for MSVC IDEs and the Experimental target 2011-02-23 11:53:20 +01:00
Gael Guennebaud
7dc18b20bb same for neon 2011-02-23 09:41:55 +01:00
Gael Guennebaud
32e7dae776 Altivec: fix infinite loop (ei_ -> internal:: change) 2011-02-23 09:41:02 +01:00
Gael Guennebaud
9ab503903e suppress unused warning 2011-02-23 09:32:55 +01:00
Gael Guennebaud
14b164b00e do not try to use Eigen's blas/lapack if they cannot be compiled 2011-02-23 09:25:32 +01:00
Gael Guennebaud
c78b5fd9aa fix no newline warning 2011-02-23 09:23:11 +01:00
Gael Guennebaud
2fb5567e08 add missing AlignedOnScalar 2011-02-22 21:25:47 +01:00
Benoit Jacob
3df134dec2 fix icc warning #68 2011-02-22 10:11:03 -05:00
Benoit Jacob
c58a2ff03a add EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS non-default option. Use it in our own CMakeLists. also add a include-guard-like mechanism to prevent doing unmatched #pragma warning push/pop. 2011-02-22 10:05:41 -05:00
Benoit Jacob
9e1127619c merge 2011-02-22 09:33:01 -05:00
Benoit Jacob
720767ae40 ICC 12 / linux only defined __INTEL_COMPILER, not __intel_compiler 2011-02-22 09:32:39 -05:00
Benoit Jacob
d8e97aee89 shut up stupid ICC warnings 2011-02-22 09:31:22 -05:00
Benoit Jacob
625814464e fix legitimate ICC 12 warning 2011-02-22 09:30:54 -05:00
Gael Guennebaud
39b27fb656 altivec compilation fix 2011-02-22 15:26:28 +01:00
Benoit Jacob
25579df2d4 'fix' a couple of clang -Wconstant-logical-operand warnings (still not convinced about the pertinence of that warning) 2011-02-22 08:54:55 -05:00
Benoit Jacob
3884308da7 __attribute__((flatten)) seems to be recognized by neither clang nor icc despite these compilers defining __GNUC__. 2011-02-22 08:40:37 -05:00
Gael Guennebaud
68631e28d4 also test non_projective_only with row major transformations 2011-02-22 14:26:32 +01:00
Benoit Jacob
39d3bc2394 fix bug #190: directly pass Transform Options to Matrix, allowing to use RowMajor. Fix issues in Transform with non-default Options. 2011-02-22 08:14:38 -05:00
Gael Guennebaud
659c97ee49 gcc 4.4 also defines float32_t as a special type 2011-02-22 10:04:09 +01:00
Gael Guennebaud
769eeac35e disable output compression since this feature seems to be broken 2011-02-21 21:19:38 +01:00
Gael Guennebaud
51da67f211 more compilation fixes for altivec 2011-02-21 20:36:20 +01:00
Gael Guennebaud
05545d0197 fix compilation 2011-02-21 17:47:31 +01:00
Gael Guennebaud
8bee573a78 workaround ICC aggressive optimization 2011-02-21 16:17:58 +01:00
Gael Guennebaud
fb1a29fed5 fix ICE and warning with gcc 4.2.4 2011-02-21 16:11:18 +01:00
Gael Guennebaud
e129e985c3 link to blas/lapack only when needed, and use the static versions to hopefully workaround weird linking issues to gfortranbegin (see jitse dashboard) 2011-02-21 15:48:37 +01:00
Gael Guennebaud
2d5ea82807 fix bug #176 (workaround a too aggressive optimization made by ICC) 2011-02-21 11:00:07 +01:00
Hauke Heibel
50a3cd678a Improved site and buildname generation. 2011-02-20 11:54:07 +01:00
Gael Guennebaud
3c00e3da03 enable some tests that have been commented out 2011-02-18 18:08:58 +01:00
Gael Guennebaud
434817164e fix umfpack with complexes 2011-02-18 18:07:59 +01:00
Gael Guennebaud
2c1ac23c62 remove unused code 2011-02-18 17:54:48 +01:00
Gael Guennebaud
a0e5b00280 forgot that one, again 2011-02-18 17:50:36 +01:00
Gael Guennebaud
6456b74a89 merge 2011-02-18 17:40:31 +01:00
Gael Guennebaud
86ca05b324 remove largeEps in adjoint unit test and use a more accurate test_isApproxWithRef test. 2011-02-18 17:39:04 +01:00
Gael Guennebaud
8f8c67b8bd fix bug #186 (in 32 bits mode, gcc 4.3 messed up with pfirst for complex<float>) 2011-02-18 15:47:17 +01:00
Benoit Jacob
aa966ca319 fix bug #187: stable norm test was quite broken 2011-02-18 09:46:49 -05:00
Gael Guennebaud
f7cd63b964 fix bug #189 (issue with fortran concentions to return COMPLEX values) 2011-02-18 15:11:31 +01:00
Gael Guennebaud
69cecc45e5 extend mapstride unit test to test unaligned configurations 2011-02-18 14:41:40 +01:00
Gael Guennebaud
abce49ea21 fix a segfault in "slice vectorization" when the destination might not be aligned on a scalar (complex<double>) 2011-02-18 14:20:36 +01:00
Gael Guennebaud
d271ad38ce back to brute force linking to sparse libraries (fix cmake when these libs are not found) 2011-02-18 11:35:45 +01:00
Gael Guennebaud
3e2314dd67 forgot to include this file in previous commit (needed for lapack) 2011-02-18 11:32:39 +01:00
Gael Guennebaud
444c1bc55b now cholmod, umfpack, and superlu uses our own BLAS and LAPACK libs 2011-02-18 11:26:31 +01:00
Gael Guennebaud
390724b4b6 add lapack interface to real symmetric eigenvalue dec and enable building of the lapack shared library 2011-02-18 11:25:04 +01:00
Gael Guennebaud
d8ca948148 it is now up to user of these Find* module to find and link to BLAS and/or LAPACK 2011-02-18 11:23:27 +01:00
Gael Guennebaud
3345ea0ddd clean a bit SuperLU declarations 2011-02-18 10:23:32 +01:00
Gael Guennebaud
9195a224f3 fix division by zero if the matrix is exactly zero 2011-02-17 19:39:57 +01:00
Gael Guennebaud
b8ef48c46d for consistency forward declare tan, asin, acos functors 2011-02-17 18:23:04 +01:00
Gael Guennebaud
a53a7d6e6a use C linkage for umfpack (might fix some linking issues) 2011-02-17 18:19:28 +01:00
Gael Guennebaud
eda59ffc1b mention std::ptr_fun in the quickref guide 2011-02-17 18:07:21 +01:00
Gael Guennebaud
6f86c12339 typo 2011-02-17 17:48:16 +01:00
Gael Guennebaud
aea630a98a factorize implementation of standard real unary math functions, and add acos, asin 2011-02-17 17:37:11 +01:00
Gael Guennebaud
2ba55e90db make check no test everything - also rm the EigenTesting cmake sub-project 2011-02-17 16:58:18 +01:00
Benoit Jacob
d0b8ce8f2a fix unused var warning 2011-02-17 09:41:17 -05:00
Gael Guennebaud
1c4e85ac7e forgot to include this file in one pretty old commit (missing EXCLUDE_FROM_ALL) 2011-02-17 15:33:35 +01:00
Jitse Niesen
78fa34e8ff Add blas tests for buildtests target. 2011-02-17 13:53:20 +00:00
Benoit Jacob
8fb27fad36 remove #include <iostream> at the wrong place 2011-02-17 07:47:05 -05:00
Jitse Niesen
be224d93f4 Include necessary header files when working around bug #89.
Fixes bug #188.
2011-02-17 11:51:48 +00:00
Benoit Jacob
11402edfd3 with old gcc (bug #89), only include iostream in debug mode 2011-02-16 12:01:47 -05:00
Gael Guennebaud
fe8a710a21 properly report OpenGL as a disabled backend 2011-02-16 18:01:06 +01:00
Gael Guennebaud
03d86ea736 fix intallation of unsupported modules 2011-02-16 17:59:35 +01:00
Benoit Jacob
13a5582835 undo debugging change 2011-02-16 09:18:48 -05:00
Benoit Jacob
59596efdf7 Fix bug #89: on GCC <= 4.3, use a custom assert implementation to work around a compiler bug 2011-02-16 08:50:19 -05:00
Jitse Niesen
6db8fa7d04 Replace unset() by set() with no value specified; this does the same.
unset() was introduced in CMake 2.6.3 but we require only 2.6.2.
2011-02-16 10:16:47 +00:00
Gael Guennebaud
2f15f74218 CTEST_CUSTOM_* parameter have to be put in a CTestCustum.cmake file which itself has to be in the build directory 2011-02-15 12:39:45 +01:00
Gael Guennebaud
578d6f7ced now ctest does compile the test even though they are not in the "all" target 2011-02-15 11:40:43 +01:00
Gael Guennebaud
a1d7e9051e fix bug #184 (warning) 2011-02-14 15:41:00 +01:00
Gael Guennebaud
8e0a42350d fix stupid warning (bug #185) 2011-02-14 15:33:26 +01:00
Hauke Heibel
ac465a0891 Improve the Transform interface in order to prevent T.rotation() = R from compiling. 2011-02-14 12:00:47 +01:00
Jitse Niesen
211e1f8044 Improve documentation of plugins. 2011-02-13 22:50:57 +00:00
Benoit Jacob
d09b94e2ad Added tag 3.0-beta3 for changeset 58986ac832 2011-02-12 18:57:10 -05:00
512 changed files with 33147 additions and 17762 deletions

11
.hgeol
View File

@@ -1,3 +1,8 @@
[patterns]
**.* = native
eigen_autoexp_part.dat = CRLF
[patterns]
scripts/*.in = LF
debug/msvc/*.dat = CRLF
unsupported/test/mpreal/*.* = CRLF
** = native
[repository]
native = LF

View File

@@ -64,6 +64,10 @@ set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
find_package(StandardMathLibrary)
set(EIGEN_TEST_CUSTOM_LINKER_FLAGS "" CACHE STRING "Additional linker flags when linking unit tests.")
set(EIGEN_TEST_CUSTOM_CXX_FLAGS "" CACHE STRING "Additional compiler flags when compiling unit tests.")
set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "")
if(NOT STANDARD_MATH_LIBRARY_FOUND)
@@ -101,6 +105,10 @@ if(EIGEN_DEFAULT_TO_ROW_MAJOR)
add_definitions("-DEIGEN_DEFAULT_TO_ROW_MAJOR")
endif()
add_definitions("-DEIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS")
set(EIGEN_TEST_MAX_SIZE "320" CACHE STRING "Maximal matrix/vector size, default is 320")
if(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fexceptions -fno-check-new -fno-common -fstrict-aliasing")
set(CMAKE_CXX_FLAGS_DEBUG "-g3")
@@ -205,6 +213,7 @@ endif(MSVC)
option(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION "Disable explicit vectorization in tests/examples" OFF)
option(EIGEN_TEST_X87 "Force using X87 instructions. Implies no vectorization." OFF)
option(EIGEN_TEST_32BIT "Force generating 32bit code." OFF)
if(EIGEN_TEST_X87)
set(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION ON)
@@ -216,6 +225,15 @@ if(EIGEN_TEST_X87)
endif()
endif()
if(EIGEN_TEST_32BIT)
if(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
message(STATUS "Forcing generation of 32-bit code in tests/examples")
else()
message(STATUS "EIGEN_TEST_32BIT ignored on your compiler")
endif()
endif()
if(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
add_definitions(-DEIGEN_DONT_VECTORIZE=1)
message(STATUS "Disabling vectorization in tests/examples")
@@ -267,9 +285,21 @@ install(FILES
)
if(EIGEN_BUILD_PKGCONFIG)
SET(path_separator ":")
STRING(REPLACE ${path_separator} ";" pkg_config_libdir_search "$ENV{PKG_CONFIG_LIBDIR}")
message(STATUS "searching for 'pkgconfig' directory in PKG_CONFIG_LIBDIR ( $ENV{PKG_CONFIG_LIBDIR} ), ${CMAKE_INSTALL_PREFIX}/share, and ${CMAKE_INSTALL_PREFIX}/lib")
FIND_PATH(pkg_config_libdir pkgconfig ${pkg_config_libdir_search} ${CMAKE_INSTALL_PREFIX}/share ${CMAKE_INSTALL_PREFIX}/lib ${pkg_config_libdir_search})
if(pkg_config_libdir)
SET(pkg_config_install_dir ${pkg_config_libdir})
message(STATUS "found ${pkg_config_libdir}/pkgconfig" )
else(pkg_config_libdir)
SET(pkg_config_install_dir ${CMAKE_INSTALL_PREFIX}/share)
message(STATUS "pkgconfig not found; installing in ${pkg_config_install_dir}" )
endif(pkg_config_libdir)
configure_file(eigen3.pc.in eigen3.pc)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc
DESTINATION share/pkgconfig
DESTINATION ${pkg_config_install_dir}/pkgconfig
)
endif(EIGEN_BUILD_PKGCONFIG)
@@ -277,10 +307,10 @@ add_subdirectory(Eigen)
add_subdirectory(doc EXCLUDE_FROM_ALL)
include(CTest)
include(EigenConfigureTesting)
# fixme, not sure this line is still needed:
enable_testing() # must be called from the root CMakeLists, see man page
include(EigenTesting)
ei_init_testing()
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest
@@ -288,20 +318,18 @@ else()
add_subdirectory(test EXCLUDE_FROM_ALL)
endif()
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
add_subdirectory(blas)
add_subdirectory(lapack)
else()
add_subdirectory(blas EXCLUDE_FROM_ALL)
add_subdirectory(lapack EXCLUDE_FROM_ALL)
endif()
add_subdirectory(unsupported)
add_subdirectory(demos EXCLUDE_FROM_ALL)
if(NOT MSVC)
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
add_subdirectory(blas)
add_subdirectory(lapack)
else()
add_subdirectory(blas EXCLUDE_FROM_ALL)
add_subdirectory(lapack EXCLUDE_FROM_ALL)
endif()
endif(NOT MSVC)
# must be after test and unsupported, for configuring buildtests.in
add_subdirectory(scripts EXCLUDE_FROM_ALL)

26
COPYING.BSD Normal file
View File

@@ -0,0 +1,26 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

View File

@@ -11,7 +11,3 @@ set(CTEST_DROP_METHOD "http")
set(CTEST_DROP_SITE "eigen.tuxfamily.org")
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen")
set(CTEST_DROP_SITE_CDASH TRUE)
## A tribute to Dynamic!
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS "33331")
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS "33331")

4
CTestCustom.cmake.in Normal file
View File

@@ -0,0 +1,4 @@
## A tribute to Dynamic!
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS "33331")
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS "33331")

View File

@@ -3,7 +3,7 @@
#include "Core"
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
@@ -24,10 +24,13 @@ namespace Eigen {
#include "src/misc/Solve.h"
#include "src/Cholesky/LLT.h"
#include "src/Cholesky/LDLT.h"
#ifdef EIGEN_USE_LAPACKE
#include "src/Cholesky/LLT_MKL.h"
#endif
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_CHOLESKY_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */

34
Eigen/CholmodSupport Normal file
View File

@@ -0,0 +1,34 @@
#ifndef EIGEN_CHOLMODSUPPORT_MODULE_H
#define EIGEN_CHOLMODSUPPORT_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
extern "C" {
#include <cholmod.h>
}
namespace Eigen {
/** \ingroup Support_modules
* \defgroup CholmodSupport_Module CholmodSupport module
*
*
* \code
* #include <Eigen/CholmodSupport>
* \endcode
*/
#include "src/misc/Solve.h"
#include "src/misc/SparseSolve.h"
#include "src/CholmodSupport/CholmodSupport.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_CHOLMODSUPPORT_MODULE_H

View File

@@ -26,14 +26,18 @@
#ifndef EIGEN_CORE_H
#define EIGEN_CORE_H
// first thing Eigen does: prevent MSVC from committing suicide
#include "src/Core/util/DisableMSVCWarnings.h"
// first thing Eigen does: stop the compiler from committing suicide
#include "src/Core/util/DisableStupidWarnings.h"
// then include this file where all our macros are defined. It's really important to do it first because
// it's where we do all the alignment settings (platform detection and honoring the user's will if he
// defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization.
#include "src/Core/util/Macros.h"
// this include file manages BLAS and MKL related macros
// and inclusion of their respective header files
#include "src/Core/util/MKL_support.h"
// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into
// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks
#if !EIGEN_ALIGN
@@ -51,16 +55,16 @@
#define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
#endif
#endif
#endif
// Remember that usage of defined() in a #define is undefined by the standard
#if (defined __SSE2__) && ( (!defined __GNUC__) || EIGEN_GNUC_AT_LEAST(4,2) )
#define EIGEN_SSE2_BUT_NOT_OLD_GCC
#else
// Remember that usage of defined() in a #define is undefined by the standard
#if (defined __SSE2__) && ( (!defined __GNUC__) || EIGEN_GNUC_AT_LEAST(4,2) )
#define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
#endif
#endif
#ifndef EIGEN_DONT_VECTORIZE
#if defined (EIGEN_SSE2_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
#if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
// Defines symbols for compile-time detection of which instructions are
// used.
@@ -143,6 +147,7 @@
#ifdef EIGEN_HAS_ERRNO
#include <cerrno>
#endif
#include <cstddef>
#include <cstdlib>
#include <cmath>
#include <complex>
@@ -158,7 +163,7 @@
// for outputting debug info
#ifdef EIGEN_DEBUG_ASSIGN
#include<iostream>
#include <iostream>
#endif
// required for __cpuid, needs to be included after cmath
@@ -166,7 +171,7 @@
#include <intrin.h>
#endif
#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_NO_EXCEPTIONS)
#if defined(_CPPUNWIND) || defined(__EXCEPTIONS)
#define EIGEN_EXCEPTIONS
#endif
@@ -174,16 +179,7 @@
#include <new>
#endif
// this needs to be done after all possible windows C header includes and before any Eigen source includes
// (system C++ includes are supposed to be able to deal with this already):
// windows.h defines min and max macros which would make Eigen fail to compile.
#if defined(min) || defined(max)
#error The preprocessor symbols 'min' or 'max' are defined. If you are compiling on Windows, do #define NOMINMAX to prevent windows.h from defining these symbols.
#endif
// defined in bits/termios.h
#undef B0
/** \brief Namespace containing all symbols from the %Eigen library. */
namespace Eigen {
inline static const char *SimdInstructionSetsInUse(void) {
@@ -239,6 +235,8 @@ inline static const char *SimdInstructionSetsInUse(void) {
// we use size_t frequently and we'll never remember to prepend it with std:: everytime just to
// ensure QNX/QCC support
using std::size_t;
// gcc 4.6.0 wants std:: for ptrdiff_t
using std::ptrdiff_t;
/** \defgroup Core_Module Core module
* This is the main module of Eigen providing dense matrix and vector support
@@ -250,6 +248,10 @@ using std::size_t;
* \endcode
*/
/** \defgroup Support_modules Support modules [category]
* Category of modules which add support for external libraries.
*/
#include "src/Core/util/Constants.h"
#include "src/Core/util/ForwardDeclarations.h"
#include "src/Core/util/Meta.h"
@@ -321,7 +323,7 @@ using std::size_t;
#include "src/Core/CommaInitializer.h"
#include "src/Core/Flagged.h"
#include "src/Core/ProductBase.h"
#include "src/Core/Product.h"
#include "src/Core/GeneralProduct.h"
#include "src/Core/TriangularMatrix.h"
#include "src/Core/SelfAdjointView.h"
#include "src/Core/SolveTriangular.h"
@@ -350,11 +352,26 @@ using std::size_t;
#include "src/Core/ArrayBase.h"
#include "src/Core/ArrayWrapper.h"
#ifdef EIGEN_USE_BLAS
#include "src/Core/products/GeneralMatrixMatrix_MKL.h"
#include "src/Core/products/GeneralMatrixVector_MKL.h"
#include "src/Core/products/GeneralMatrixMatrixTriangular_MKL.h"
#include "src/Core/products/SelfadjointMatrixMatrix_MKL.h"
#include "src/Core/products/SelfadjointMatrixVector_MKL.h"
#include "src/Core/products/TriangularMatrixMatrix_MKL.h"
#include "src/Core/products/TriangularMatrixVector_MKL.h"
#include "src/Core/products/TriangularSolverMatrix_MKL.h"
#endif // EIGEN_USE_BLAS
#ifdef EIGEN_USE_MKL_VML
#include "src/Core/Assign_MKL.h"
#endif
} // namespace Eigen
#include "src/Core/GlobalFunctions.h"
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#ifdef EIGEN2_SUPPORT
#include "Eigen2Support"

View File

@@ -29,11 +29,12 @@
#error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before including any Eigen header
#endif
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
/** \defgroup Eigen2Support_Module Eigen2 support module
/** \ingroup Support_modules
* \defgroup Eigen2Support_Module Eigen2 support module
* This module provides a couple of deprecated functions improving the compatibility with Eigen2.
*
* To use it, define EIGEN2_SUPPORT before including any Eigen header
@@ -58,11 +59,29 @@ namespace Eigen {
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
// Eigen2 used to include iostream
#include<iostream>
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
using Eigen::Matrix##SizeSuffix##TypeSuffix; \
using Eigen::Vector##SizeSuffix##TypeSuffix; \
using Eigen::RowVector##SizeSuffix##TypeSuffix;
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \
#define EIGEN_USING_MATRIX_TYPEDEFS \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd)
#define USING_PART_OF_NAMESPACE_EIGEN \
EIGEN_USING_MATRIX_TYPEDEFS \
using Eigen::Matrix; \

View File

@@ -3,12 +3,13 @@
#include "Core"
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
#include "Cholesky"
#include "Jacobi"
#include "Householder"
#include "LU"
#include "Geometry"
namespace Eigen {
@@ -35,10 +36,15 @@ namespace Eigen {
#include "src/Eigenvalues/ComplexSchur.h"
#include "src/Eigenvalues/ComplexEigenSolver.h"
#include "src/Eigenvalues/MatrixBaseEigenvalues.h"
#ifdef EIGEN_USE_LAPACKE
#include "src/Eigenvalues/RealSchur_MKL.h"
#include "src/Eigenvalues/ComplexSchur_MKL.h"
#include "src/Eigenvalues/SelfAdjointEigenSolver_MKL.h"
#endif
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_EIGENVALUES_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -3,7 +3,7 @@
#include "Core"
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
#include "SVD"
#include "LU"
@@ -60,7 +60,7 @@ namespace Eigen {
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_GEOMETRY_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -3,7 +3,7 @@
#include "Core"
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
@@ -21,7 +21,7 @@ namespace Eigen {
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_HOUSEHOLDER_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -0,0 +1,37 @@
#ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H
#define EIGEN_ITERATIVELINEARSOLVERS_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
/** \ingroup Sparse_modules
* \defgroup IterativeLinearSolvers_Module IterativeLinearSolvers module
*
* This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse.
* Those solvers are accessible via the following classes:
* - ConjugateGradient for selfadjoint (hermitian) matrices,
* - BiCGSTAB for general square matrices.
*
* Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport.
*
* \code
* #include <Eigen/IterativeLinearSolvers>
* \endcode
*/
#include "src/misc/Solve.h"
#include "src/misc/SparseSolve.h"
#include "src/IterativeLinearSolvers/IterativeSolverBase.h"
#include "src/IterativeLinearSolvers/BasicPreconditioners.h"
#include "src/IterativeLinearSolvers/ConjugateGradient.h"
#include "src/IterativeLinearSolvers/BiCGSTAB.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_ITERATIVELINEARSOLVERS_MODULE_H

View File

@@ -3,7 +3,7 @@
#include "Core"
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
@@ -23,7 +23,7 @@ namespace Eigen {
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_JACOBI_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -3,7 +3,7 @@
#include "Core"
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
@@ -23,6 +23,9 @@ namespace Eigen {
#include "src/misc/Image.h"
#include "src/LU/FullPivLU.h"
#include "src/LU/PartialPivLU.h"
#ifdef EIGEN_USE_LAPACKE
#include "src/LU/PartialPivLU_MKL.h"
#endif
#include "src/LU/Determinant.h"
#include "src/LU/Inverse.h"
@@ -36,7 +39,7 @@ namespace Eigen {
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_LU_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -5,9 +5,12 @@
#error LeastSquares is only available in Eigen2 support mode (define EIGEN2_SUPPORT)
#endif
// exclude from normal eigen3-only documentation
#ifdef EIGEN2_SUPPORT
#include "Core"
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
#include "Eigenvalues"
#include "Geometry"
@@ -26,6 +29,8 @@ namespace Eigen {
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN2_SUPPORT
#endif // EIGEN_REGRESSION_MODULE_H

27
Eigen/OrderingMethods Normal file
View File

@@ -0,0 +1,27 @@
#ifndef EIGEN_ORDERINGMETHODS_MODULE_H
#define EIGEN_ORDERINGMETHODS_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
/** \ingroup Sparse_modules
* \defgroup OrderingMethods_Module OrderingMethods module
*
* This module is currently for internal use only.
*
*
* \code
* #include <Eigen/OrderingMethods>
* \endcode
*/
#include "src/OrderingMethods/Amd.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_ORDERINGMETHODS_MODULE_H

30
Eigen/PARDISOSupport Normal file
View File

@@ -0,0 +1,30 @@
#ifndef EIGEN_PARDISOSUPPORT_MODULE_H
#define EIGEN_PARDISOSUPPORT_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
#include <mkl_pardiso.h>
#include <unsupported/Eigen/SparseExtra>
namespace Eigen {
/** \ingroup Support_modules
* \defgroup PARDISOSupport_Module PARDISOSupport module
*
* This module brings support for the Intel(R) MKL PARDISO direct sparse solvers
*
* \code
* #include <Eigen/PARDISOSupport>
* \endcode
*/
#include "src/PARDISOSupport/PARDISOSupport.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_PARDISOSUPPORT_MODULE_H

View File

@@ -3,7 +3,7 @@
#include "Core"
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
#include "Cholesky"
#include "Jacobi"
@@ -28,6 +28,10 @@ namespace Eigen {
#include "src/QR/HouseholderQR.h"
#include "src/QR/FullPivHouseholderQR.h"
#include "src/QR/ColPivHouseholderQR.h"
#ifdef EIGEN_USE_LAPACKE
#include "src/QR/HouseholderQR_MKL.h"
#include "src/QR/ColPivHouseholderQR_MKL.h"
#endif
#ifdef EIGEN2_SUPPORT
#include "src/Eigen2Support/QR.h"
@@ -35,7 +39,7 @@ namespace Eigen {
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#ifdef EIGEN2_SUPPORT
#include "Eigenvalues"

View File

@@ -6,7 +6,7 @@
#if (!EIGEN_MALLOC_ALREADY_ALIGNED)
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
void *qMalloc(size_t size)
{
@@ -26,7 +26,7 @@ void *qRealloc(void *ptr, size_t size)
return newPtr;
}
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#endif

View File

@@ -5,7 +5,7 @@
#include "Householder"
#include "Jacobi"
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
@@ -13,9 +13,9 @@ namespace Eigen {
*
*
*
* This module provides SVD decomposition for (currently) real matrices.
* This module provides SVD decomposition for matrices (both real and complex).
* This decomposition is accessible via the following MatrixBase method:
* - MatrixBase::svd()
* - MatrixBase::jacobiSvd()
*
* \code
* #include <Eigen/SVD>
@@ -24,6 +24,9 @@ namespace Eigen {
#include "src/misc/Solve.h"
#include "src/SVD/JacobiSVD.h"
#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
#include "src/SVD/JacobiSVD_MKL.h"
#endif
#include "src/SVD/UpperBidiagonalization.h"
#ifdef EIGEN2_SUPPORT
@@ -32,7 +35,7 @@ namespace Eigen {
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_SVD_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -1,69 +1,27 @@
#ifndef EIGEN_SPARSE_MODULE_H
#define EIGEN_SPARSE_MODULE_H
#include "Core"
#include "src/Core/util/DisableMSVCWarnings.h"
#include <vector>
#include <map>
#include <cstdlib>
#include <cstring>
#include <algorithm>
#ifdef EIGEN2_SUPPORT
#define EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET
#endif
#ifndef EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET
#error The sparse module API is not stable yet. To use it anyway, please define the EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET preprocessor token.
#endif
namespace Eigen {
/** \defgroup Sparse_Module Sparse module
/** \defgroup Sparse_modules Sparse modules
*
*
*
* See the \ref TutorialSparse "Sparse tutorial"
* Meta-module including all related modules:
* - SparseCore
* - OrderingMethods
* - SparseCholesky
* - IterativeLinearSolvers
*
* \code
* #include <Eigen/Sparse>
* \endcode
*/
/** The type used to identify a general sparse storage. */
struct Sparse {};
#include "src/Sparse/SparseUtil.h"
#include "src/Sparse/SparseMatrixBase.h"
#include "src/Sparse/CompressedStorage.h"
#include "src/Sparse/AmbiVector.h"
#include "src/Sparse/SparseMatrix.h"
#include "src/Sparse/DynamicSparseMatrix.h"
#include "src/Sparse/MappedSparseMatrix.h"
#include "src/Sparse/SparseVector.h"
#include "src/Sparse/CoreIterators.h"
#include "src/Sparse/SparseBlock.h"
#include "src/Sparse/SparseTranspose.h"
#include "src/Sparse/SparseCwiseUnaryOp.h"
#include "src/Sparse/SparseCwiseBinaryOp.h"
#include "src/Sparse/SparseDot.h"
#include "src/Sparse/SparseAssign.h"
#include "src/Sparse/SparseRedux.h"
#include "src/Sparse/SparseFuzzy.h"
#include "src/Sparse/SparseProduct.h"
#include "src/Sparse/SparseSparseProduct.h"
#include "src/Sparse/SparseDenseProduct.h"
#include "src/Sparse/SparseDiagonalProduct.h"
#include "src/Sparse/SparseTriangularView.h"
#include "src/Sparse/SparseSelfAdjointView.h"
#include "src/Sparse/TriangularSolver.h"
#include "src/Sparse/SparseView.h"
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "SparseCore"
#include "OrderingMethods"
#include "SparseCholesky"
#include "IterativeLinearSolvers"
#endif // EIGEN_SPARSE_MODULE_H

34
Eigen/SparseCholesky Normal file
View File

@@ -0,0 +1,34 @@
#ifndef EIGEN_SPARSECHOLESKY_MODULE_H
#define EIGEN_SPARSECHOLESKY_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
/** \ingroup Sparse_modules
* \defgroup SparseCholesky_Module SparseCholesky module
*
* This module currently provides two variants of the direct sparse Cholesky decomposition for selfadjoint (hermitian) matrices.
* Those decompositions are accessible via the following classes:
* - SimplicialLLt,
* - SimplicialLDLt
*
* Such problems can also be solved using the ConjugateGradient solver from the IterativeLinearSolvers module.
*
* \code
* #include <Eigen/SparseCholesky>
* \endcode
*/
#include "src/misc/Solve.h"
#include "src/misc/SparseSolve.h"
#include "src/SparseCholesky/SimplicialCholesky.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_SPARSECHOLESKY_MODULE_H

65
Eigen/SparseCore Normal file
View File

@@ -0,0 +1,65 @@
#ifndef EIGEN_SPARSECORE_MODULE_H
#define EIGEN_SPARSECORE_MODULE_H
#include "Core"
#include "src/Core/util/DisableStupidWarnings.h"
#include <vector>
#include <map>
#include <cstdlib>
#include <cstring>
#include <algorithm>
namespace Eigen {
/** \ingroup Sparse_modules
* \defgroup SparseCore_Module SparseCore module
*
* This module provides a sparse matrix representation, and basic associatd matrix manipulations
* and operations.
*
* See the \ref TutorialSparse "Sparse tutorial"
*
* \code
* #include <Eigen/SparseCore>
* \endcode
*
* This module depends on: Core.
*/
/** The type used to identify a general sparse storage. */
struct Sparse {};
#include "src/SparseCore/SparseUtil.h"
#include "src/SparseCore/SparseMatrixBase.h"
#include "src/SparseCore/CompressedStorage.h"
#include "src/SparseCore/AmbiVector.h"
#include "src/SparseCore/SparseMatrix.h"
#include "src/SparseCore/MappedSparseMatrix.h"
#include "src/SparseCore/SparseVector.h"
#include "src/SparseCore/CoreIterators.h"
#include "src/SparseCore/SparseBlock.h"
#include "src/SparseCore/SparseTranspose.h"
#include "src/SparseCore/SparseCwiseUnaryOp.h"
#include "src/SparseCore/SparseCwiseBinaryOp.h"
#include "src/SparseCore/SparseDot.h"
#include "src/SparseCore/SparseAssign.h"
#include "src/SparseCore/SparseRedux.h"
#include "src/SparseCore/SparseFuzzy.h"
#include "src/SparseCore/ConservativeSparseSparseProduct.h"
#include "src/SparseCore/SparseSparseProductWithPruning.h"
#include "src/SparseCore/SparseProduct.h"
#include "src/SparseCore/SparseDenseProduct.h"
#include "src/SparseCore/SparseDiagonalProduct.h"
#include "src/SparseCore/SparseTriangularView.h"
#include "src/SparseCore/SparseSelfAdjointView.h"
#include "src/SparseCore/TriangularSolver.h"
#include "src/SparseCore/SparseView.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_SPARSECORE_MODULE_H

53
Eigen/SuperLUSupport Normal file
View File

@@ -0,0 +1,53 @@
#ifndef EIGEN_SUPERLUSUPPORT_MODULE_H
#define EIGEN_SUPERLUSUPPORT_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
#ifdef EMPTY
#define EIGEN_EMPTY_WAS_ALREADY_DEFINED
#endif
typedef int int_t;
#include <slu_Cnames.h>
#include <supermatrix.h>
#include <slu_util.h>
// slu_util.h defines a preprocessor token named EMPTY which is really polluting,
// so we remove it in favor of a SUPERLU_EMPTY token.
// If EMPTY was already, defined then we don't undef it.
#if defined(EIGEN_EMPTY_WAS_ALREADY_DEFINED)
# undef EIGEN_EMPTY_WAS_ALREADY_DEFINED
#elif defined(EMPTY)
# undef EMPTY
#endif
#define SUPERLU_EMPTY (-1)
namespace Eigen { struct SluMatrix; }
namespace Eigen {
/** \ingroup Support_modules
* \defgroup SuperLUSupport_Module SuperLUSupport module
*
* \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting.
*
* \code
* #include <Eigen/SuperLUSupport>
* \endcode
*/
#include "src/misc/Solve.h"
#include "src/misc/SparseSolve.h"
#include "src/SuperLUSupport/SuperLUSupport.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_SUPERLUSUPPORT_MODULE_H

34
Eigen/UmfPackSupport Normal file
View File

@@ -0,0 +1,34 @@
#ifndef EIGEN_UMFPACKSUPPORT_MODULE_H
#define EIGEN_UMFPACKSUPPORT_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
extern "C" {
#include <umfpack.h>
}
namespace Eigen {
/** \ingroup Support_modules
* \defgroup UmfPackSupport_Module UmfPackSupport module
*
*
*
*
* \code
* #include <Eigen/UmfPackSupport>
* \endcode
*/
#include "src/misc/Solve.h"
#include "src/misc/SparseSolve.h"
#include "src/UmfPackSupport/UmfPackSupport.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_UMFPACKSUPPORT_MODULE_H

View File

@@ -1,9 +1,10 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2009 Keir Mierle <mierle@gmail.com>
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2011 Timothy E. Holy <tim.holy@gmail.com >
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -31,13 +32,15 @@ namespace internal {
template<typename MatrixType, int UpLo> struct LDLT_Traits;
}
/** \ingroup cholesky_Module
/** \ingroup Cholesky_Module
*
* \class LDLT
*
* \brief Robust Cholesky decomposition of a matrix with pivoting
*
* \param MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
* \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
* The other triangular part won't be read.
*
* Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite
* matrix \f$ A \f$ such that \f$ A = P^TLDL^*P \f$, where P is a permutation matrix, L
@@ -48,14 +51,10 @@ template<typename MatrixType, int UpLo> struct LDLT_Traits;
* on D also stabilizes the computation.
*
* Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky
* decomposition to determine whether a system of equations has a solution.
* decomposition to determine whether a system of equations has a solution.
*
* \sa MatrixBase::ldlt(), class LLT
*/
/* THIS PART OF THE DOX IS CURRENTLY DISABLED BECAUSE INACCURATE BECAUSE OF BUG IN THE DECOMPOSITION CODE
* Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
* the strict lower part does not have to store correct values.
*/
template<typename _MatrixType, int _UpLo> class LDLT
{
public:
@@ -98,6 +97,11 @@ template<typename _MatrixType, int _UpLo> class LDLT
m_isInitialized(false)
{}
/** \brief Constructor with decomposition
*
* This calculates the decomposition for the input \a matrix.
* \sa LDLT(Index size)
*/
LDLT(const MatrixType& matrix)
: m_matrix(matrix.rows(), matrix.cols()),
m_transpositions(matrix.rows()),
@@ -107,6 +111,14 @@ template<typename _MatrixType, int _UpLo> class LDLT
compute(matrix);
}
/** Clear any existing decomposition
* \sa rankUpdate(w,sigma)
*/
void setZero()
{
m_isInitialized = false;
}
/** \returns a view of the upper triangular matrix U */
inline typename Traits::MatrixU matrixU() const
{
@@ -130,14 +142,14 @@ template<typename _MatrixType, int _UpLo> class LDLT
}
/** \returns the coefficients of the diagonal matrix D */
inline Diagonal<const MatrixType> vectorD(void) const
inline Diagonal<const MatrixType> vectorD() const
{
eigen_assert(m_isInitialized && "LDLT is not initialized.");
return m_matrix.diagonal();
}
/** \returns true if the matrix is positive (semidefinite) */
inline bool isPositive(void) const
inline bool isPositive() const
{
eigen_assert(m_isInitialized && "LDLT is not initialized.");
return m_sign == 1;
@@ -158,10 +170,19 @@ template<typename _MatrixType, int _UpLo> class LDLT
}
/** \returns a solution x of \f$ A x = b \f$ using the current decomposition of A.
*
* This function also supports in-place solves using the syntax <tt>x = decompositionObject.solve(x)</tt> .
*
* \note_about_checking_solutions
*
* \sa solveInPlace(), MatrixBase::ldlt()
* More precisely, this method solves \f$ A x = b \f$ using the decomposition \f$ A = P^T L D L^* P \f$
* by solving the systems \f$ P^T y_1 = b \f$, \f$ L y_2 = y_1 \f$, \f$ D y_3 = y_2 \f$,
* \f$ L^* y_4 = y_3 \f$ and \f$ P x = y_4 \f$ in succession. If the matrix \f$ A \f$ is singular, then
* \f$ D \f$ will also be singular (all the other matrices are invertible). In that case, the
* least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function
* computes the least-square solution of \f$ A x = b \f$ is \f$ A \f$ is singular.
*
* \sa MatrixBase::ldlt()
*/
template<typename Rhs>
inline const internal::solve_retval<LDLT, Rhs>
@@ -187,6 +208,9 @@ template<typename _MatrixType, int _UpLo> class LDLT
LDLT& compute(const MatrixType& matrix);
template <typename Derived>
LDLT& rankUpdate(const MatrixBase<Derived>& w,RealScalar alpha=1);
/** \returns the internal LDLT decomposition matrix
*
* TODO: document the storage layout
@@ -202,6 +226,17 @@ template<typename _MatrixType, int _UpLo> class LDLT
inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); }
/** \brief Reports whether previous computation was successful.
*
* \returns \c Success if computation was succesful,
* \c NumericalIssue if the matrix.appears to be negative.
*/
ComputationInfo info() const
{
eigen_assert(m_isInitialized && "LDLT is not initialized.");
return Success;
}
protected:
/** \internal
@@ -240,7 +275,7 @@ template<> struct ldlt_inplace<Lower>
return true;
}
RealScalar cutoff = 0, biggest_in_corner;
RealScalar cutoff(0), biggest_in_corner;
for (Index k = 0; k < size; ++k)
{
@@ -308,6 +343,61 @@ template<> struct ldlt_inplace<Lower>
return true;
}
// Reference for the algorithm: Davis and Hager, "Multiple Rank
// Modifications of a Sparse Cholesky Factorization" (Algorithm 1)
// Trivial rearrangements of their computations (Timothy E. Holy)
// allow their algorithm to work for rank-1 updates even if the
// original matrix is not of full rank.
// Here only rank-1 updates are implemented, to reduce the
// requirement for intermediate storage and improve accuracy
template<typename MatrixType, typename WDerived>
static bool updateInPlace(MatrixType& mat, MatrixBase<WDerived>& w, typename MatrixType::RealScalar sigma=1)
{
using internal::isfinite;
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
typedef typename MatrixType::Index Index;
const Index size = mat.rows();
eigen_assert(mat.cols() == size && w.size()==size);
RealScalar alpha = 1;
// Apply the update
for (Index j = 0; j < size; j++)
{
// Check for termination due to an original decomposition of low-rank
if (!isfinite(alpha))
break;
// Update the diagonal terms
RealScalar dj = real(mat.coeff(j,j));
Scalar wj = w.coeff(j);
RealScalar swj2 = sigma*abs2(wj);
RealScalar gamma = dj*alpha + swj2;
mat.coeffRef(j,j) += swj2/alpha;
alpha += swj2/dj;
// Update the terms of L
Index rs = size-j-1;
w.tail(rs) -= wj * mat.col(j).tail(rs);
if(gamma != 0)
mat.col(j).tail(rs) += (sigma*conj(wj)/gamma)*w.tail(rs);
}
return true;
}
template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
static bool update(MatrixType& mat, const TranspositionType& transpositions, Workspace& tmp, const WType& w, typename MatrixType::RealScalar sigma=1)
{
// Apply the permutation to the input w
tmp = transpositions * w;
return ldlt_inplace<Lower>::updateInPlace(mat,tmp,sigma);
}
};
template<> struct ldlt_inplace<Upper>
@@ -318,22 +408,29 @@ template<> struct ldlt_inplace<Upper>
Transpose<MatrixType> matt(mat);
return ldlt_inplace<Lower>::unblocked(matt, transpositions, temp, sign);
}
template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
static EIGEN_STRONG_INLINE bool update(MatrixType& mat, TranspositionType& transpositions, Workspace& tmp, WType& w, typename MatrixType::RealScalar sigma=1)
{
Transpose<MatrixType> matt(mat);
return ldlt_inplace<Lower>::update(matt, transpositions, tmp, w.conjugate(), sigma);
}
};
template<typename MatrixType> struct LDLT_Traits<MatrixType,Lower>
{
typedef TriangularView<MatrixType, UnitLower> MatrixL;
typedef TriangularView<typename MatrixType::AdjointReturnType, UnitUpper> MatrixU;
inline static MatrixL getL(const MatrixType& m) { return m; }
inline static MatrixU getU(const MatrixType& m) { return m.adjoint(); }
typedef TriangularView<const MatrixType, UnitLower> MatrixL;
typedef TriangularView<const typename MatrixType::AdjointReturnType, UnitUpper> MatrixU;
static inline MatrixL getL(const MatrixType& m) { return m; }
static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
};
template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
{
typedef TriangularView<typename MatrixType::AdjointReturnType, UnitLower> MatrixL;
typedef TriangularView<MatrixType, UnitUpper> MatrixU;
inline static MatrixL getL(const MatrixType& m) { return m.adjoint(); }
inline static MatrixU getU(const MatrixType& m) { return m; }
typedef TriangularView<const typename MatrixType::AdjointReturnType, UnitLower> MatrixL;
typedef TriangularView<const MatrixType, UnitUpper> MatrixU;
static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); }
static inline MatrixU getU(const MatrixType& m) { return m; }
};
} // end namespace internal
@@ -358,6 +455,37 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
return *this;
}
/** Update the LDLT decomposition: given A = L D L^T, efficiently compute the decomposition of A + sigma w w^T.
* \param w a vector to be incorporated into the decomposition.
* \param sigma a scalar, +1 for updates and -1 for "downdates," which correspond to removing previously-added column vectors. Optional; default value is +1.
* \sa setZero()
*/
template<typename MatrixType, int _UpLo>
template<typename Derived>
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w,typename NumTraits<typename MatrixType::Scalar>::Real sigma)
{
const Index size = w.rows();
if (m_isInitialized)
{
eigen_assert(m_matrix.rows()==size);
}
else
{
m_matrix.resize(size,size);
m_matrix.setZero();
m_transpositions.resize(size);
for (Index i = 0; i < size; i++)
m_transpositions.coeffRef(i) = i;
m_temporary.resize(size);
m_sign = sigma;
m_isInitialized = true;
}
internal::ldlt_inplace<UpLo>::update(m_matrix, m_transpositions, m_temporary, w, sigma);
return *this;
}
namespace internal {
template<typename _MatrixType, int _UpLo, typename Rhs>
struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
@@ -376,7 +504,21 @@ struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
dec().matrixL().solveInPlace(dst);
// dst = D^-1 (L^-1 P b)
dst = dec().vectorD().asDiagonal().inverse() * dst;
// more precisely, use pseudo-inverse of D (see bug 241)
using std::abs;
using std::max;
typedef typename LDLTType::MatrixType MatrixType;
typedef typename LDLTType::Scalar Scalar;
typedef typename LDLTType::RealScalar RealScalar;
const Diagonal<const MatrixType> vectorD = dec().vectorD();
RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() * NumTraits<Scalar>::epsilon(),
RealScalar(1) / NumTraits<RealScalar>::highest()); // motivated by LAPACK's xGELSS
for (Index i = 0; i < vectorD.size(); ++i) {
if(abs(vectorD(i)) > tolerance)
dst.row(i) /= vectorD(i);
else
dst.row(i).setZero();
}
// dst = L^-T (D^-1 L^-1 P b)
dec().matrixU().solveInPlace(dst);

View File

@@ -29,13 +29,15 @@ namespace internal{
template<typename MatrixType, int UpLo> struct LLT_Traits;
}
/** \ingroup cholesky_Module
/** \ingroup Cholesky_Module
*
* \class LLT
*
* \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features
*
* \param MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
* \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
* The other triangular part won't be read.
*
* This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
* matrix A such that A = LL^* = U^*U, where L is lower triangular.
@@ -49,6 +51,9 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
* use LDLT instead for the semidefinite case. Also, do not use a Cholesky decomposition to determine whether a system of equations
* has a solution.
*
* Example: \include LLT_example.cpp
* Output: \verbinclude LLT_example.out
*
* \sa MatrixBase::llt(), class LDLT
*/
/* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
@@ -178,6 +183,9 @@ template<typename _MatrixType, int _UpLo> class LLT
inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); }
template<typename VectorType>
LLT& rankUpdate(const VectorType& vec, const RealScalar& sigma = 1);
protected:
/** \internal
* Used to compute and store L
@@ -190,16 +198,15 @@ template<typename _MatrixType, int _UpLo> class LLT
namespace internal {
template<int UpLo> struct llt_inplace;
template<typename Scalar, int UpLo> struct llt_inplace;
template<> struct llt_inplace<Lower>
template<typename Scalar> struct llt_inplace<Scalar, Lower>
{
typedef typename NumTraits<Scalar>::Real RealScalar;
template<typename MatrixType>
static typename MatrixType::Index unblocked(MatrixType& mat)
{
typedef typename MatrixType::Index Index;
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
eigen_assert(mat.rows()==mat.cols());
const Index size = mat.rows();
@@ -233,7 +240,7 @@ template<> struct llt_inplace<Lower>
Index blockSize = size/8;
blockSize = (blockSize/16)*16;
blockSize = std::min(std::max(blockSize,Index(8)), Index(128));
blockSize = (std::min)((std::max)(blockSize,Index(8)), Index(128));
for (Index k=0; k<size; k+=blockSize)
{
@@ -241,7 +248,7 @@ template<> struct llt_inplace<Lower>
// A00 | - | -
// lu = A10 | A11 | -
// A20 | A21 | A22
Index bs = std::min(blockSize, size-k);
Index bs = (std::min)(blockSize, size-k);
Index rs = size - k - bs;
Block<MatrixType,Dynamic,Dynamic> A11(m,k, k, bs,bs);
Block<MatrixType,Dynamic,Dynamic> A21(m,k+bs,k, rs,bs);
@@ -254,55 +261,133 @@ template<> struct llt_inplace<Lower>
}
return -1;
}
template<typename MatrixType, typename VectorType>
static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
{
typedef typename MatrixType::Index Index;
typedef typename MatrixType::ColXpr ColXpr;
typedef typename internal::remove_all<ColXpr>::type ColXprCleaned;
typedef typename ColXprCleaned::SegmentReturnType ColXprSegment;
typedef Matrix<Scalar,Dynamic,1> TempVectorType;
typedef typename TempVectorType::SegmentReturnType TempVecSegment;
int n = mat.cols();
eigen_assert(mat.rows()==n && vec.size()==n);
TempVectorType temp;
if(sigma>0)
{
// This version is based on Givens rotations.
// It is faster than the other one below, but only works for updates,
// i.e., for sigma > 0
temp = sqrt(sigma) * vec;
for(int i=0; i<n; ++i)
{
JacobiRotation<Scalar> g;
g.makeGivens(mat(i,i), -temp(i), &mat(i,i));
int rs = n-i-1;
if(rs>0)
{
ColXprSegment x(mat.col(i).tail(rs));
TempVecSegment y(temp.tail(rs));
apply_rotation_in_the_plane(x, y, g);
}
}
}
else
{
temp = vec;
RealScalar beta = 1;
for(int j=0; j<n; ++j)
{
RealScalar Ljj = real(mat.coeff(j,j));
RealScalar dj = abs2(Ljj);
Scalar wj = temp.coeff(j);
RealScalar swj2 = sigma*abs2(wj);
RealScalar gamma = dj*beta + swj2;
RealScalar x = dj + swj2/beta;
if (x<=RealScalar(0))
return j;
RealScalar nLjj = sqrt(x);
mat.coeffRef(j,j) = nLjj;
beta += swj2/dj;
// Update the terms of L
Index rs = n-j-1;
if(rs)
{
temp.tail(rs) -= (wj/Ljj) * mat.col(j).tail(rs);
if(gamma != 0)
mat.col(j).tail(rs) = (nLjj/Ljj) * mat.col(j).tail(rs) + (nLjj * sigma*conj(wj)/gamma)*temp.tail(rs);
}
}
}
return -1;
}
};
template<> struct llt_inplace<Upper>
template<typename Scalar> struct llt_inplace<Scalar, Upper>
{
typedef typename NumTraits<Scalar>::Real RealScalar;
template<typename MatrixType>
static EIGEN_STRONG_INLINE typename MatrixType::Index unblocked(MatrixType& mat)
{
Transpose<MatrixType> matt(mat);
return llt_inplace<Lower>::unblocked(matt);
return llt_inplace<Scalar, Lower>::unblocked(matt);
}
template<typename MatrixType>
static EIGEN_STRONG_INLINE typename MatrixType::Index blocked(MatrixType& mat)
{
Transpose<MatrixType> matt(mat);
return llt_inplace<Lower>::blocked(matt);
return llt_inplace<Scalar, Lower>::blocked(matt);
}
template<typename MatrixType, typename VectorType>
static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
{
Transpose<MatrixType> matt(mat);
return llt_inplace<Scalar, Lower>::rankUpdate(matt, vec.conjugate(), sigma);
}
};
template<typename MatrixType> struct LLT_Traits<MatrixType,Lower>
{
typedef TriangularView<MatrixType, Lower> MatrixL;
typedef TriangularView<typename MatrixType::AdjointReturnType, Upper> MatrixU;
inline static MatrixL getL(const MatrixType& m) { return m; }
inline static MatrixU getU(const MatrixType& m) { return m.adjoint(); }
typedef TriangularView<const MatrixType, Lower> MatrixL;
typedef TriangularView<const typename MatrixType::AdjointReturnType, Upper> MatrixU;
static inline MatrixL getL(const MatrixType& m) { return m; }
static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
static bool inplace_decomposition(MatrixType& m)
{ return llt_inplace<Lower>::blocked(m)==-1; }
{ return llt_inplace<typename MatrixType::Scalar, Lower>::blocked(m)==-1; }
};
template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
{
typedef TriangularView<typename MatrixType::AdjointReturnType, Lower> MatrixL;
typedef TriangularView<MatrixType, Upper> MatrixU;
inline static MatrixL getL(const MatrixType& m) { return m.adjoint(); }
inline static MatrixU getU(const MatrixType& m) { return m; }
typedef TriangularView<const typename MatrixType::AdjointReturnType, Lower> MatrixL;
typedef TriangularView<const MatrixType, Upper> MatrixU;
static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); }
static inline MatrixU getU(const MatrixType& m) { return m; }
static bool inplace_decomposition(MatrixType& m)
{ return llt_inplace<Upper>::blocked(m)==-1; }
{ return llt_inplace<typename MatrixType::Scalar, Upper>::blocked(m)==-1; }
};
} // end namespace internal
/** Computes / recomputes the Cholesky decomposition A = LL^* = U^*U of \a matrix
*
*
* \returns a reference to *this
*
* Example: \include TutorialLinAlgComputeTwice.cpp
* Output: \verbinclude TutorialLinAlgComputeTwice.out
*/
template<typename MatrixType, int _UpLo>
LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
{
assert(a.rows()==a.cols());
eigen_assert(a.rows()==a.cols());
const Index size = a.rows();
m_matrix.resize(size, size);
m_matrix = a;
@@ -314,6 +399,26 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
return *this;
}
/** Performs a rank one update (or dowdate) of the current decomposition.
* If A = LL^* before the rank one update,
* then after it we have LL^* = A + sigma * v v^* where \a v must be a vector
* of same dimension.
*/
template<typename MatrixType, int _UpLo>
template<typename VectorType>
LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::rankUpdate(const VectorType& v, const RealScalar& sigma)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType);
eigen_assert(v.size()==m_matrix.cols());
eigen_assert(m_isInitialized);
if(internal::llt_inplace<typename MatrixType::Scalar, UpLo>::rankUpdate(m_matrix,v,sigma)>=0)
m_info = NumericalIssue;
else
m_info = Success;
return *this;
}
namespace internal {
template<typename _MatrixType, int UpLo, typename Rhs>
struct solve_retval<LLT<_MatrixType, UpLo>, Rhs>
@@ -384,3 +489,4 @@ SelfAdjointView<MatrixType, UpLo>::llt() const
}
#endif // EIGEN_LLT_H

View File

@@ -0,0 +1,123 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* LLt decomposition based on LAPACKE_?potrf function.
********************************************************************************
*/
#ifndef EIGEN_LLT_MKL_H
#define EIGEN_LLT_MKL_H
#include "Eigen/src/Core/util/MKL_support.h"
#include <iostream>
namespace internal {
template<typename Scalar> struct mkl_llt;
#define EIGEN_MKL_LLT(EIGTYPE, MKLTYPE, MKLPREFIX) \
template<> struct mkl_llt<EIGTYPE> \
{ \
template<typename MatrixType> \
static inline typename MatrixType::Index potrf(MatrixType& m, char uplo) \
{ \
lapack_int matrix_order; \
lapack_int size, lda, info, StorageOrder; \
EIGTYPE* a; \
eigen_assert(m.rows()==m.cols()); \
/* Set up parameters for ?potrf */ \
size = m.rows(); \
StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \
matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
a = &(m.coeffRef(0,0)); \
lda = m.outerStride(); \
\
info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \
info = (info==0) ? Success : NumericalIssue; \
return info; \
} \
}; \
template<> struct llt_inplace<EIGTYPE, Lower> \
{ \
template<typename MatrixType> \
static typename MatrixType::Index blocked(MatrixType& m) \
{ \
return mkl_llt<EIGTYPE>::potrf(m, 'L'); \
} \
template<typename MatrixType, typename VectorType> \
static void rankUpdate(MatrixType& mat, const VectorType& vec) \
{ \
typedef typename MatrixType::ColXpr ColXpr; \
typedef typename internal::remove_all<ColXpr>::type ColXprCleaned; \
typedef typename ColXprCleaned::SegmentReturnType ColXprSegment; \
typedef typename MatrixType::Scalar Scalar; \
typedef Matrix<Scalar,Dynamic,1> TempVectorType; \
typedef typename TempVectorType::SegmentReturnType TempVecSegment; \
\
int n = mat.cols(); \
eigen_assert(mat.rows()==n && vec.size()==n); \
TempVectorType temp(vec); \
\
for(int i=0; i<n; ++i) \
{ \
JacobiRotation<Scalar> g; \
g.makeGivens(mat(i,i), -temp(i), &mat(i,i)); \
\
int rs = n-i-1; \
if(rs>0) \
{ \
ColXprSegment x(mat.col(i).tail(rs)); \
TempVecSegment y(temp.tail(rs)); \
apply_rotation_in_the_plane(x, y, g); \
} \
} \
} \
}; \
template<> struct llt_inplace<EIGTYPE, Upper> \
{ \
template<typename MatrixType> \
static typename MatrixType::Index blocked(MatrixType& m) \
{ \
return mkl_llt<EIGTYPE>::potrf(m, 'U'); \
} \
template<typename MatrixType, typename VectorType> \
static void rankUpdate(MatrixType& mat, const VectorType& vec) \
{ \
Transpose<MatrixType> matt(mat); \
return llt_inplace<EIGTYPE, Lower>::rankUpdate(matt, vec.conjugate()); \
} \
};
EIGEN_MKL_LLT(double, double, d)
EIGEN_MKL_LLT(float, float, s)
EIGEN_MKL_LLT(dcomplex, MKL_Complex16, z)
EIGEN_MKL_LLT(scomplex, MKL_Complex8, c)
}
#endif // EIGEN_LLT_MKL_H

View File

@@ -0,0 +1,6 @@
FILE(GLOB Eigen_CholmodSupport_SRCS "*.h")
INSTALL(FILES
${Eigen_CholmodSupport_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/CholmodSupport COMPONENT Devel
)

View File

@@ -69,11 +69,20 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
res.nzmax = mat.nonZeros();
res.nrow = mat.rows();;
res.ncol = mat.cols();
res.p = mat._outerIndexPtr();
res.i = mat._innerIndexPtr();
res.x = mat._valuePtr();
res.p = mat.outerIndexPtr();
res.i = mat.innerIndexPtr();
res.x = mat.valuePtr();
res.sorted = 1;
res.packed = 1;
if(mat.isCompressed())
{
res.packed = 1;
}
else
{
res.packed = 0;
res.nz = mat.innerNonZeroPtr();
}
res.dtype = 0;
res.stype = -1;
@@ -149,7 +158,9 @@ enum CholmodMode {
CholmodAuto, CholmodSimplicialLLt, CholmodSupernodalLLt, CholmodLDLt
};
/** \brief A Cholesky factorization and solver based on Cholmod
/** \ingroup CholmodSupport_Module
* \class CholmodDecomposition
* \brief A Cholesky factorization and solver based on Cholmod
*
* This class allows to solve for A.X = B sparse linear problems via a LL^T or LDL^T Cholesky factorization
* using the Cholmod library. The sparse matrix A must be selfajoint and positive definite. The vectors or matrices
@@ -159,6 +170,9 @@ enum CholmodMode {
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
* or Upper. Default is Lower.
*
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; isCompressed() or unisCompressed().
*
* \sa \ref TutorialSparseDirectSolvers
*/
template<typename _MatrixType, int _UpLo = Lower>
class CholmodDecomposition

View File

@@ -37,6 +37,9 @@
* API for the %Matrix class provides easy access to linear-algebra
* operations.
*
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
*
* \sa \ref TutorialArrayClass, \ref TopicClassHierarchy
*/
namespace internal {
@@ -65,10 +68,8 @@ class Array
friend struct internal::conservative_resize_like_impl;
using Base::m_storage;
public:
enum { NeedsToAlign = (!(Options&DontAlign))
&& SizeAtCompileTime!=Dynamic && ((static_cast<int>(sizeof(Scalar))*SizeAtCompileTime)%16)==0 };
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
using Base::base;
using Base::coeff;

View File

@@ -42,7 +42,10 @@ template<typename ExpressionType> class MatrixWrapper;
*
* This class is the base that is inherited by all array expression types.
*
* \param Derived is the derived type, e.g., an array or an expression type.
* \tparam Derived is the derived type, e.g., an array or an expression type.
*
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN.
*
* \sa class MatrixBase, \ref TopicClassHierarchy
*/
@@ -156,7 +159,7 @@ template<typename Derived> class ArrayBase
/** \returns an \link MatrixBase Matrix \endlink expression of this array
* \sa MatrixBase::array() */
MatrixWrapper<Derived> matrix() { return derived(); }
const MatrixWrapper<Derived> matrix() const { return derived(); }
const MatrixWrapper<const Derived> matrix() const { return derived(); }
// template<typename Dest>
// inline void evalTo(Dest& dst) const { dst = matrix(); }
@@ -171,10 +174,10 @@ template<typename Derived> class ArrayBase
protected:
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator+=(const MatrixBase<OtherDerived>& )
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator-=(const MatrixBase<OtherDerived>& )
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
};
/** replaces \c *this by \c *this - \a other.

View File

@@ -53,16 +53,25 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper)
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper)
typedef typename internal::conditional<
internal::is_lvalue<ExpressionType>::value,
Scalar,
const Scalar
>::type ScalarWithConstIfNotLvalue;
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
inline ArrayWrapper(const ExpressionType& matrix) : m_expression(matrix) {}
inline ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
inline Index rows() const { return m_expression.rows(); }
inline Index cols() const { return m_expression.cols(); }
inline Index outerStride() const { return m_expression.outerStride(); }
inline Index innerStride() const { return m_expression.innerStride(); }
inline const CoeffReturnType coeff(Index row, Index col) const
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
inline const Scalar* data() const { return m_expression.data(); }
inline CoeffReturnType coeff(Index row, Index col) const
{
return m_expression.coeff(row, col);
}
@@ -77,7 +86,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
return m_expression.const_cast_derived().coeffRef(row, col);
}
inline const CoeffReturnType coeff(Index index) const
inline CoeffReturnType coeff(Index index) const
{
return m_expression.coeff(index);
}
@@ -119,8 +128,14 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
template<typename Dest>
inline void evalTo(Dest& dst) const { dst = m_expression; }
const typename internal::remove_all<NestedExpressionType>::type&
nestedExpression() const
{
return m_expression;
}
protected:
const NestedExpressionType m_expression;
NestedExpressionType m_expression;
};
/** \class MatrixWrapper
@@ -151,16 +166,25 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper)
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper)
typedef typename internal::conditional<
internal::is_lvalue<ExpressionType>::value,
Scalar,
const Scalar
>::type ScalarWithConstIfNotLvalue;
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
inline MatrixWrapper(const ExpressionType& matrix) : m_expression(matrix) {}
inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
inline Index rows() const { return m_expression.rows(); }
inline Index cols() const { return m_expression.cols(); }
inline Index outerStride() const { return m_expression.outerStride(); }
inline Index innerStride() const { return m_expression.innerStride(); }
inline const CoeffReturnType coeff(Index row, Index col) const
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
inline const Scalar* data() const { return m_expression.data(); }
inline CoeffReturnType coeff(Index row, Index col) const
{
return m_expression.coeff(row, col);
}
@@ -175,7 +199,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
return m_expression.derived().coeffRef(row, col);
}
inline const CoeffReturnType coeff(Index index) const
inline CoeffReturnType coeff(Index index) const
{
return m_expression.coeff(index);
}
@@ -214,8 +238,14 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
m_expression.const_cast_derived().template writePacket<LoadMode>(index, x);
}
const typename internal::remove_all<NestedExpressionType>::type&
nestedExpression() const
{
return m_expression;
}
protected:
const NestedExpressionType m_expression;
NestedExpressionType m_expression;
};
#endif // EIGEN_ARRAYWRAPPER_H

View File

@@ -41,7 +41,7 @@ public:
DstIsAligned = Derived::Flags & AlignedBit,
DstHasDirectAccess = Derived::Flags & DirectAccessBit,
SrcIsAligned = OtherDerived::Flags & AlignedBit,
JointAlignment = DstIsAligned && SrcIsAligned ? Aligned : Unaligned
JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned
};
private:
@@ -106,9 +106,9 @@ public:
: int(NoUnrolling)
)
: int(Traversal) == int(LinearVectorizedTraversal)
? ( int(MayUnrollCompletely) && int(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) )
? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) )
: int(Traversal) == int(LinearTraversal)
? ( int(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) )
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) )
: int(NoUnrolling)
};
@@ -152,7 +152,7 @@ struct assign_DefaultTraversal_CompleteUnrolling
inner = Index % Derived1::InnerSizeAtCompileTime
};
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
dst.copyCoeffByOuterInner(outer, inner, src);
assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
@@ -162,13 +162,13 @@ struct assign_DefaultTraversal_CompleteUnrolling
template<typename Derived1, typename Derived2, int Stop>
struct assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, int Index, int Stop>
struct assign_DefaultTraversal_InnerUnrolling
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, int outer)
{
dst.copyCoeffByOuterInner(outer, Index, src);
assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
@@ -178,7 +178,7 @@ struct assign_DefaultTraversal_InnerUnrolling
template<typename Derived1, typename Derived2, int Stop>
struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, int) {}
};
/***********************
@@ -188,7 +188,7 @@ struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
template<typename Derived1, typename Derived2, int Index, int Stop>
struct assign_LinearTraversal_CompleteUnrolling
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
dst.copyCoeff(Index, src);
assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
@@ -198,7 +198,7 @@ struct assign_LinearTraversal_CompleteUnrolling
template<typename Derived1, typename Derived2, int Stop>
struct assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
};
/**************************
@@ -214,7 +214,7 @@ struct assign_innervec_CompleteUnrolling
JointAlignment = assign_traits<Derived1,Derived2>::JointAlignment
};
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
dst.template copyPacketByOuterInner<Derived2, Aligned, JointAlignment>(outer, inner, src);
assign_innervec_CompleteUnrolling<Derived1, Derived2,
@@ -225,13 +225,13 @@ struct assign_innervec_CompleteUnrolling
template<typename Derived1, typename Derived2, int Stop>
struct assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, int Index, int Stop>
struct assign_innervec_InnerUnrolling
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, int outer)
{
dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src);
assign_innervec_InnerUnrolling<Derived1, Derived2,
@@ -242,7 +242,7 @@ struct assign_innervec_InnerUnrolling
template<typename Derived1, typename Derived2, int Stop>
struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, int) {}
};
/***************************************************************************
@@ -251,24 +251,25 @@ struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
template<typename Derived1, typename Derived2,
int Traversal = assign_traits<Derived1, Derived2>::Traversal,
int Unrolling = assign_traits<Derived1, Derived2>::Unrolling>
int Unrolling = assign_traits<Derived1, Derived2>::Unrolling,
int Version = Specialized>
struct assign_impl;
/************************
*** Default traversal ***
************************/
template<typename Derived1, typename Derived2, int Unrolling>
struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling>
template<typename Derived1, typename Derived2, int Unrolling, int Version>
struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling, Version>
{
inline static void run(Derived1 &, const Derived2 &) { }
static inline void run(Derived1 &, const Derived2 &) { }
};
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize();
@@ -278,21 +279,21 @@ struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
}
};
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling, Version>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src);
}
};
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling, Version>
{
typedef typename Derived1::Index Index;
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
@@ -305,11 +306,11 @@ struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
*** Linear traversal ***
***********************/
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
const Index size = dst.size();
for(Index i = 0; i < size; ++i)
@@ -317,10 +318,10 @@ struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling>
}
};
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling, Version>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src);
@@ -331,11 +332,11 @@ struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling>
*** Inner vectorization ***
**************************/
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling, Version>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize();
@@ -346,21 +347,21 @@ struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
}
};
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling, Version>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src);
}
};
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling, Version>
{
typedef typename Derived1::Index Index;
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
@@ -398,11 +399,11 @@ struct unaligned_assign_impl<false>
}
};
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling, Version>
{
typedef typename Derived1::Index Index;
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
const Index size = dst.size();
typedef packet_traits<typename Derived1::Scalar> PacketTraits;
@@ -412,7 +413,7 @@ struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
};
const Index alignedStart = assign_traits<Derived1,Derived2>::DstIsAligned ? 0
: first_aligned(&dst.coeffRef(0), size);
: internal::first_aligned(&dst.coeffRef(0), size);
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
unaligned_assign_impl<assign_traits<Derived1,Derived2>::DstIsAligned!=0>::run(src,dst,0,alignedStart);
@@ -426,11 +427,11 @@ struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
}
};
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling, Version>
{
typedef typename Derived1::Index Index;
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
enum { size = Derived1::SizeAtCompileTime,
packetSize = packet_traits<typename Derived1::Scalar>::size,
@@ -445,11 +446,11 @@ struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnroll
*** Slice vectorization ***
***************************/
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling, Version>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
typedef packet_traits<typename Derived1::Scalar> PacketTraits;
enum {
@@ -463,7 +464,7 @@ struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
const Index outerSize = dst.outerSize();
const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
Index alignedStart = ((!alignable) || assign_traits<Derived1,Derived2>::DstIsAligned) ? 0
: first_aligned(&dst.coeffRef(0,0), innerSize);
: internal::first_aligned(&dst.coeffRef(0,0), innerSize);
for(Index outer = 0; outer < outerSize; ++outer)
{
@@ -474,7 +475,7 @@ struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
// do the vectorizable part of the assignment
for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
dst.template copyPacketByOuterInner<Derived2, Aligned, Unaligned>(outer, inner, src);
dst.template copyPacketByOuterInner<Derived2, dstAlignment, Unaligned>(outer, inner, src);
// do the non-vectorizable part of the assignment
for(Index inner = alignedEnd; inner<innerSize ; ++inner)
@@ -531,19 +532,19 @@ struct assign_selector;
template<typename Derived, typename OtherDerived>
struct assign_selector<Derived,OtherDerived,false,false> {
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
};
template<typename Derived, typename OtherDerived>
struct assign_selector<Derived,OtherDerived,true,false> {
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
};
template<typename Derived, typename OtherDerived>
struct assign_selector<Derived,OtherDerived,false,true> {
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
};
template<typename Derived, typename OtherDerived>
struct assign_selector<Derived,OtherDerived,true,true> {
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
};
} // end namespace internal

217
Eigen/src/Core/Assign_MKL.h Normal file
View File

@@ -0,0 +1,217 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin()
********************************************************************************
*/
#ifndef EIGEN_ASSIGN_VML_H
#define EIGEN_ASSIGN_VML_H
namespace internal {
template<typename Op> struct vml_call
{ enum { IsSupported = 0 }; };
template<typename Dst, typename Src, typename UnaryOp>
class vml_assign_traits
{
private:
enum {
DstHasDirectAccess = Dst::Flags & DirectAccessBit,
SrcHasDirectAccess = Src::Flags & DirectAccessBit,
StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
: int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
: int(Dst::RowsAtCompileTime),
InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
: int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
: int(Dst::MaxRowsAtCompileTime),
MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
MightEnableVml = vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess
&& Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD,
MayEnableVml = MightEnableVml && LargeEnough,
MayLinearize = MayEnableVml && MightLinearize
};
public:
enum {
Traversal = MayLinearize ? LinearVectorizedTraversal
: MayEnableVml ? InnerVectorizedTraversal
: DefaultTraversal
};
};
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling,
int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal >
struct vml_assign_impl
: assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>
{
};
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>
{
typedef typename Derived1::Scalar Scalar;
typedef typename Derived1::Index Index;
static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
{
// in case we want to (or have to) skip VML at runtime we can call:
// assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer) {
const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :
&(src.nestedExpression().coeffRef(0, outer));
Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));
vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );
}
}
};
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>
{
static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
{
// in case we want to (or have to) skip VML at runtime we can call:
// assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );
}
};
// Macroses
#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \
template<typename Derived1, typename Derived2, typename UnaryOp> \
struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized> { \
static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \
vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \
} \
};
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling)
#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
#define EIGEN_MKL_VML_MODE VML_HA
#else
#define EIGEN_MKL_VML_MODE VML_LA
#endif
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
enum { IsSupported = 1 }; \
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \
} \
};
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
enum { IsSupported = 1 }; \
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \
} \
};
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
enum { IsSupported = 1 }; \
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
EIGENTYPE exponent = func.m_exponent; \
MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \
(VMLTYPE*)dst, &vmlMode); \
} \
};
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan)
//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
} // end namespace internal
#endif // EIGEN_ASSIGN_VML_H

View File

@@ -87,7 +87,7 @@ class BandMatrixBase : public EigenBase<Derived>
if (i<=supers())
{
start = supers()-i;
len = std::min(rows(),std::max<Index>(0,coeffs().rows() - (supers()-i)));
len = (std::min)(rows(),std::max<Index>(0,coeffs().rows() - (supers()-i)));
}
else if (i>=rows()-subs())
len = std::max<Index>(0,coeffs().rows() - (i + 1 - rows() + subs()));
@@ -96,11 +96,11 @@ class BandMatrixBase : public EigenBase<Derived>
/** \returns a vector expression of the main diagonal */
inline Block<CoefficientsType,1,SizeAtCompileTime> diagonal()
{ return Block<CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,std::min(rows(),cols())); }
{ return Block<CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,(std::min)(rows(),cols())); }
/** \returns a vector expression of the main diagonal (const version) */
inline const Block<const CoefficientsType,1,SizeAtCompileTime> diagonal() const
{ return Block<const CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,std::min(rows(),cols())); }
{ return Block<const CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,(std::min)(rows(),cols())); }
template<int Index> struct DiagonalIntReturnType {
enum {
@@ -122,13 +122,13 @@ class BandMatrixBase : public EigenBase<Derived>
/** \returns a vector expression of the \a N -th sub or super diagonal */
template<int N> inline typename DiagonalIntReturnType<N>::Type diagonal()
{
return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, std::max(0,N), 1, diagonalLength(N));
return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N));
}
/** \returns a vector expression of the \a N -th sub or super diagonal */
template<int N> inline const typename DiagonalIntReturnType<N>::Type diagonal() const
{
return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, std::max(0,N), 1, diagonalLength(N));
return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N));
}
/** \returns a vector expression of the \a i -th sub or super diagonal */
@@ -166,7 +166,7 @@ class BandMatrixBase : public EigenBase<Derived>
protected:
inline Index diagonalLength(Index i) const
{ return i<0 ? std::min(cols(),rows()+i) : std::min(rows(),cols()-i); }
{ return i<0 ? (std::min)(cols(),rows()+i) : (std::min)(rows(),cols()-i); }
};
/**
@@ -180,7 +180,7 @@ class BandMatrixBase : public EigenBase<Derived>
* \param Cols Number of columns, or \b Dynamic
* \param Supers Number of super diagonal
* \param Subs Number of sub diagonal
* \param _Options A combination of either \b RowMajor or \b ColMajor, and of \b SelfAdjoint
* \param _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
* The former controls \ref TopicStorageOrders "storage order", and defaults to
* column-major. The latter controls whether the matrix represents a selfadjoint
* matrix in which case either Supers of Subs have to be null.
@@ -284,6 +284,7 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT
: m_coeffs(coeffs),
m_rows(rows), m_supers(supers), m_subs(subs)
{
EIGEN_UNUSED_VARIABLE(cols);
//internal::assert(coeffs.cols()==cols() && (supers()+subs()+1)==coeffs.rows());
}

View File

@@ -94,7 +94,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess>
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
&& (InnerStrideAtCompileTime == 1)
? PacketAccessBit : 0,
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && ((OuterStrideAtCompileTime % packet_traits<Scalar>::size) == 0)) ? AlignedBit : 0,
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
@@ -242,6 +242,21 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
inline Index outerStride() const;
#endif
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
{
return m_xpr;
}
Index startRow() const
{
return m_startRow.value();
}
Index startCol() const
{
return m_startCol.value();
}
protected:
const typename XprType::Nested m_xpr;
@@ -304,6 +319,11 @@ class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
init();
}
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
{
return m_xpr;
}
/** \sa MapBase::innerStride() */
inline Index innerStride() const
{
@@ -341,7 +361,7 @@ class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
: m_xpr.innerStride();
}
const typename XprType::Nested m_xpr;
typename XprType::Nested m_xpr;
int m_outerStride;
};

View File

@@ -35,7 +35,7 @@ struct all_unroller
row = (UnrollCount-1) % Derived::RowsAtCompileTime
};
inline static bool run(const Derived &mat)
static inline bool run(const Derived &mat)
{
return all_unroller<Derived, UnrollCount-1>::run(mat) && mat.coeff(row, col);
}
@@ -44,13 +44,13 @@ struct all_unroller
template<typename Derived>
struct all_unroller<Derived, 1>
{
inline static bool run(const Derived &mat) { return mat.coeff(0, 0); }
static inline bool run(const Derived &mat) { return mat.coeff(0, 0); }
};
template<typename Derived>
struct all_unroller<Derived, Dynamic>
{
inline static bool run(const Derived &) { return false; }
static inline bool run(const Derived &) { return false; }
};
template<typename Derived, int UnrollCount>
@@ -61,7 +61,7 @@ struct any_unroller
row = (UnrollCount-1) % Derived::RowsAtCompileTime
};
inline static bool run(const Derived &mat)
static inline bool run(const Derived &mat)
{
return any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col);
}
@@ -70,13 +70,13 @@ struct any_unroller
template<typename Derived>
struct any_unroller<Derived, 1>
{
inline static bool run(const Derived &mat) { return mat.coeff(0, 0); }
static inline bool run(const Derived &mat) { return mat.coeff(0, 0); }
};
template<typename Derived>
struct any_unroller<Derived, Dynamic>
{
inline static bool run(const Derived &) { return false; }
static inline bool run(const Derived &) { return false; }
};
} // end namespace internal

View File

@@ -167,8 +167,8 @@ class CwiseBinaryOp : internal::no_assignment_operator,
const BinaryOp& functor() const { return m_functor; }
protected:
const LhsNested m_lhs;
const RhsNested m_rhs;
LhsNested m_lhs;
RhsNested m_rhs;
const BinaryOp m_functor;
};

View File

@@ -101,6 +101,9 @@ class CwiseNullaryOp : internal::no_assignment_operator,
return m_functor.packetOp(index);
}
/** \returns the functor representing the nullary operation */
const NullaryOp& functor() const { return m_functor; }
protected:
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
@@ -742,7 +745,7 @@ struct setIdentity_impl<Derived, true>
static EIGEN_STRONG_INLINE Derived& run(Derived& m)
{
m.setZero();
const Index size = std::min(m.rows(), m.cols());
const Index size = (std::min)(m.rows(), m.cols());
for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
return m;
}

View File

@@ -95,7 +95,7 @@ class CwiseUnaryOp : internal::no_assignment_operator,
nestedExpression() { return m_xpr.const_cast_derived(); }
protected:
const typename XprType::Nested m_xpr;
typename XprType::Nested m_xpr;
const UnaryOp m_functor;
};

View File

@@ -97,7 +97,7 @@ class CwiseUnaryView : internal::no_assignment_operator,
protected:
// FIXME changed from MatrixType::Nested because of a weird compilation error with sun CC
const typename internal::nested<MatrixType>::type m_matrix;
typename internal::nested<MatrixType>::type m_matrix;
ViewOp m_functor;
};

View File

@@ -34,7 +34,10 @@
* This class is the base that is inherited by all dense objects (matrix, vector, arrays,
* and related expression types). The common Eigen API for dense objects is contained in this class.
*
* \param Derived is the derived type, e.g., a matrix type or an expression.
* \tparam Derived is the derived type, e.g., a matrix type or an expression.
*
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN.
*
* \sa \ref TopicClassHierarchy
*/
@@ -53,7 +56,13 @@ template<typename Derived> class DenseBase
class InnerIterator;
typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename internal::traits<Derived>::Index Index; /**< The type of indices */
/** \brief The type of indices
* \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE.
* \sa \ref TopicPreprocessorDirectives.
*/
typedef typename internal::traits<Derived>::Index Index;
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
@@ -367,12 +376,13 @@ template<typename Derived> class DenseBase
inline Derived& operator*=(const Scalar& other);
inline Derived& operator/=(const Scalar& other);
typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
/** \returns the matrix or vector obtained by evaluating this expression.
*
* Notice that in the case of a plain matrix or vector (not an expression) this function just returns
* a const reference, in order to avoid a useless copy.
*/
EIGEN_STRONG_INLINE const typename internal::eval<Derived>::type eval() const
EIGEN_STRONG_INLINE EvalReturnType eval() const
{
// Even though MSVC does not honor strong inlining when the return type
// is a dynamic matrix, we desperately need strong inlining for fixed

View File

@@ -35,7 +35,7 @@ template<typename T> struct add_const_on_value_type_if_arithmetic
/** \brief Base class providing read-only coefficient access to matrices and arrays.
* \ingroup Core_Module
* \tparam Derived Type of the derived class
* \tparam ReadOnlyAccessors Constant indicating read-only access
* \tparam #ReadOnlyAccessors Constant indicating read-only access
*
* This class defines the \c operator() \c const function and friends, which can be used to read specific
* entries of a matrix or array.
@@ -212,7 +212,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
* to ensure that a packet really starts there. This method is only available on expressions having the
* PacketAccessBit.
*
* The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
* The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
* the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
* starting at an address which is a multiple of the packet size.
*/
@@ -239,7 +239,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
* to ensure that a packet really starts there. This method is only available on expressions having the
* PacketAccessBit and the LinearAccessBit.
*
* The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
* The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
* the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
* starting at an address which is a multiple of the packet size.
*/
@@ -275,7 +275,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
/** \brief Base class providing read/write coefficient access to matrices and arrays.
* \ingroup Core_Module
* \tparam Derived Type of the derived class
* \tparam WriteAccessors Constant indicating read/write access
* \tparam #WriteAccessors Constant indicating read/write access
*
* This class defines the non-const \c operator() function and friends, which can be used to write specific
* entries of a matrix or array. This class inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which
@@ -433,7 +433,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
* to ensure that a packet really starts there. This method is only available on expressions having the
* PacketAccessBit.
*
* The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
* The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
* the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
* starting at an address which is a multiple of the packet size.
*/
@@ -567,7 +567,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
/** \brief Base class providing direct read-only coefficient access to matrices and arrays.
* \ingroup Core_Module
* \tparam Derived Type of the derived class
* \tparam DirectAccessors Constant indicating direct access
* \tparam #DirectAccessors Constant indicating direct access
*
* This class defines functions to work with strides which can be used to access entries directly. This class
* inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which defines functions to access entries read-only using
@@ -637,7 +637,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
/** \brief Base class providing direct read/write coefficient access to matrices and arrays.
* \ingroup Core_Module
* \tparam Derived Type of the derived class
* \tparam DirectAccessors Constant indicating direct access
* \tparam #DirectWriteAccessors Constant indicating direct access
*
* This class defines functions to work with strides which can be used to access entries directly. This class
* inherits DenseCoeffsBase<Derived, WriteAccessors> which defines functions to access entries read/write using
@@ -710,16 +710,16 @@ namespace internal {
template<typename Derived, bool JustReturnZero>
struct first_aligned_impl
{
inline static typename Derived::Index run(const Derived&)
static inline typename Derived::Index run(const Derived&)
{ return 0; }
};
template<typename Derived>
struct first_aligned_impl<Derived, false>
{
inline static typename Derived::Index run(const Derived& m)
static inline typename Derived::Index run(const Derived& m)
{
return first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
return internal::first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
}
};
@@ -729,7 +729,7 @@ struct first_aligned_impl<Derived, false>
* documentation.
*/
template<typename Derived>
inline static typename Derived::Index first_aligned(const Derived& m)
static inline typename Derived::Index first_aligned(const Derived& m)
{
return first_aligned_impl
<Derived, (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>

View File

@@ -58,14 +58,14 @@ struct plain_array
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
eigen_assert((reinterpret_cast<size_t>(array) & sizemask) == 0 \
&& "this assertion is explained here: " \
"http://eigen.tuxfamily.org/dox/UnalignedArrayAssert.html" \
"http://eigen.tuxfamily.org/dox-devel/TopicUnalignedArrayAssert.html" \
" **** READ THIS WEB PAGE !!! ****");
#endif
template <typename T, int Size, int MatrixOrArrayOptions>
struct plain_array<T, Size, MatrixOrArrayOptions, 16>
{
EIGEN_ALIGN16 T array[Size];
EIGEN_USER_ALIGN16 T array[Size];
plain_array() { EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0xf) }
plain_array(constructor_without_unaligned_array_assert) {}
};
@@ -73,7 +73,7 @@ struct plain_array<T, Size, MatrixOrArrayOptions, 16>
template <typename T, int MatrixOrArrayOptions, int Alignment>
struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
{
EIGEN_ALIGN16 T array[1];
EIGEN_USER_ALIGN16 T array[1];
plain_array() {}
plain_array(constructor_without_unaligned_array_assert) {}
};
@@ -104,8 +104,8 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
: m_data(internal::constructor_without_unaligned_array_assert()) {}
inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
inline static DenseIndex rows(void) {return _Rows;}
inline static DenseIndex cols(void) {return _Cols;}
static inline DenseIndex rows(void) {return _Rows;}
static inline DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
inline const T *data() const { return m_data.array; }
@@ -120,14 +120,24 @@ template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0
inline DenseStorage(internal::constructor_without_unaligned_array_assert) {}
inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
inline void swap(DenseStorage& ) {}
inline static DenseIndex rows(void) {return _Rows;}
inline static DenseIndex cols(void) {return _Cols;}
static inline DenseIndex rows(void) {return _Rows;}
static inline DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
inline const T *data() const { return 0; }
inline T *data() { return 0; }
};
// more specializations for null matrices; these are necessary to resolve ambiguities
template<typename T, int _Options> class DenseStorage<T, 0, Dynamic, Dynamic, _Options>
: public DenseStorage<T, 0, 0, 0, _Options> { };
template<typename T, int _Rows, int _Options> class DenseStorage<T, 0, _Rows, Dynamic, _Options>
: public DenseStorage<T, 0, 0, 0, _Options> { };
template<typename T, int _Cols, int _Options> class DenseStorage<T, 0, Dynamic, _Cols, _Options>
: public DenseStorage<T, 0, 0, 0, _Options> { };
// dynamic-size matrix with fixed-size storage
template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic, Dynamic, _Options>
{
@@ -241,7 +251,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
inline static DenseIndex rows(void) {return _Rows;}
static inline DenseIndex rows(void) {return _Rows;}
inline DenseIndex cols(void) const {return m_cols;}
inline void conservativeResize(DenseIndex size, DenseIndex, DenseIndex cols)
{
@@ -278,7 +288,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
inline DenseIndex rows(void) const {return m_rows;}
inline static DenseIndex cols(void) {return _Cols;}
static inline DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex size, DenseIndex rows, DenseIndex)
{
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);

View File

@@ -2,6 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -87,7 +88,7 @@ template<typename MatrixType, int DiagIndex> class Diagonal
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
inline Index rows() const
{ return m_index.value()<0 ? std::min(m_matrix.cols(),m_matrix.rows()+m_index.value()) : std::min(m_matrix.rows(),m_matrix.cols()-m_index.value()); }
{ return m_index.value()<0 ? (std::min)(m_matrix.cols(),m_matrix.rows()+m_index.value()) : (std::min)(m_matrix.rows(),m_matrix.cols()-m_index.value()); }
inline Index cols() const { return 1; }
@@ -101,6 +102,15 @@ template<typename MatrixType, int DiagIndex> class Diagonal
return 0;
}
typedef typename internal::conditional<
internal::is_lvalue<MatrixType>::value,
Scalar,
const Scalar
>::type ScalarWithConstIfNotLvalue;
inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
inline Scalar& coeffRef(Index row, Index)
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
@@ -133,8 +143,19 @@ template<typename MatrixType, int DiagIndex> class Diagonal
return m_matrix.coeff(index+rowOffset(), index+colOffset());
}
const typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() const
{
return m_matrix;
}
int index() const
{
return m_index.value();
}
protected:
const typename MatrixType::Nested m_matrix;
typename MatrixType::Nested m_matrix;
const internal::variable_if_dynamic<Index, DiagIndex> m_index;
private:

View File

@@ -72,7 +72,7 @@ class DiagonalBase : public EigenBase<Derived>
const DiagonalProduct<MatrixDerived, Derived, OnTheLeft>
operator*(const MatrixBase<MatrixDerived> &matrix) const;
inline const DiagonalWrapper<CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
inverse() const
{
return diagonal().cwiseInverse();
@@ -251,13 +251,13 @@ class DiagonalWrapper
#endif
/** Constructor from expression of diagonal coefficients to wrap. */
inline DiagonalWrapper(const DiagonalVectorType& diagonal) : m_diagonal(diagonal) {}
inline DiagonalWrapper(DiagonalVectorType& diagonal) : m_diagonal(diagonal) {}
/** \returns a const reference to the wrapped expression of diagonal coefficients. */
const DiagonalVectorType& diagonal() const { return m_diagonal; }
protected:
const typename DiagonalVectorType::Nested m_diagonal;
typename DiagonalVectorType::Nested m_diagonal;
};
/** \returns a pseudo-expression of a diagonal matrix with *this as vector of diagonal coefficients

View File

@@ -107,8 +107,8 @@ class DiagonalProduct : internal::no_assignment_operator,
m_diagonal.diagonal().template packet<DiagonalVectorPacketLoadMode>(id));
}
const typename MatrixType::Nested m_matrix;
const typename DiagonalType::Nested m_diagonal;
typename MatrixType::Nested m_matrix;
typename DiagonalType::Nested m_diagonal;
};
/** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal.

View File

@@ -116,7 +116,9 @@ MatrixBase<Derived>::eigen2_dot(const MatrixBase<OtherDerived>& other) const
//---------- implementation of L2 norm and related functions ----------
/** \returns the squared \em l2 norm of *this, i.e., for vectors, the dot product of *this with itself.
/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm.
* In both cases, it consists in the sum of the square of all the matrix entries.
* For vectors, this is also equals to the dot product of \c *this with itself.
*
* \sa dot(), norm()
*/
@@ -126,7 +128,9 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
return internal::real((*this).cwiseAbs2().sum());
}
/** \returns the \em l2 norm of *this, i.e., for vectors, the square root of the dot product of *this with itself.
/** \returns, for vectors, the \em l2 norm of \c *this, and for matrices the Frobenius norm.
* In both cases, it consists in the square root of the sum of the square of all the matrix entries.
* For vectors, this is also equals to the square root of the dot product of \c *this with itself.
*
* \sa dot(), squaredNorm()
*/
@@ -172,7 +176,7 @@ template<typename Derived, int p>
struct lpNorm_selector
{
typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
inline static RealScalar run(const MatrixBase<Derived>& m)
static inline RealScalar run(const MatrixBase<Derived>& m)
{
return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p);
}
@@ -181,7 +185,7 @@ struct lpNorm_selector
template<typename Derived>
struct lpNorm_selector<Derived, 1>
{
inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
{
return m.cwiseAbs().sum();
}
@@ -190,7 +194,7 @@ struct lpNorm_selector<Derived, 1>
template<typename Derived>
struct lpNorm_selector<Derived, 2>
{
inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
{
return m.norm();
}
@@ -199,7 +203,7 @@ struct lpNorm_selector<Derived, 2>
template<typename Derived>
struct lpNorm_selector<Derived, Infinity>
{
inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
{
return m.cwiseAbs().maxCoeff();
}

View File

@@ -116,7 +116,7 @@ struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
*/
template<typename Scalar> struct scalar_min_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return std::min(a, b); }
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::min; return (min)(a, b); }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return internal::pmin(a,b); }
@@ -139,7 +139,7 @@ struct functor_traits<scalar_min_op<Scalar> > {
*/
template<typename Scalar> struct scalar_max_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return std::max(a, b); }
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::max; return (max)(a, b); }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return internal::pmax(a,b); }
@@ -165,8 +165,10 @@ template<typename Scalar> struct scalar_hypot_op {
// typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
{
Scalar p = std::max(_x, _y);
Scalar q = std::min(_x, _y);
using std::max;
using std::min;
Scalar p = (max)(_x, _y);
Scalar q = (min)(_x, _y);
Scalar qp = q/p;
return p * sqrt(Scalar(1) + qp*qp);
}
@@ -218,6 +220,38 @@ struct functor_traits<scalar_quotient_op<Scalar> > {
};
};
/** \internal
* \brief Template functor to compute the and of two booleans
*
* \sa class CwiseBinaryOp, ArrayBase::operator&&
*/
struct scalar_boolean_and_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
};
template<> struct functor_traits<scalar_boolean_and_op> {
enum {
Cost = NumTraits<bool>::AddCost,
PacketAccess = false
};
};
/** \internal
* \brief Template functor to compute the or of two booleans
*
* \sa class CwiseBinaryOp, ArrayBase::operator||
*/
struct scalar_boolean_or_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
};
template<> struct functor_traits<scalar_boolean_or_op> {
enum {
Cost = NumTraits<bool>::AddCost,
PacketAccess = false
};
};
// unary functors:
/** \internal
@@ -605,7 +639,7 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op
EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const
{
eigen_assert(col==0 || row==0);
return impl(col + row);
return impl.packetOp(col + row);
}
// This proxy object handles the actual required temporaries, the different
@@ -669,7 +703,7 @@ struct functor_traits<scalar_sqrt_op<Scalar> >
/** \internal
* \brief Template functor to compute the cosine of a scalar
* \sa class CwiseUnaryOp, Cwise::cos()
* \sa class CwiseUnaryOp, ArrayBase::cos()
*/
template<typename Scalar> struct scalar_cos_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
@@ -688,7 +722,7 @@ struct functor_traits<scalar_cos_op<Scalar> >
/** \internal
* \brief Template functor to compute the sine of a scalar
* \sa class CwiseUnaryOp, Cwise::sin()
* \sa class CwiseUnaryOp, ArrayBase::sin()
*/
template<typename Scalar> struct scalar_sin_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
@@ -708,7 +742,7 @@ struct functor_traits<scalar_sin_op<Scalar> >
/** \internal
* \brief Template functor to compute the tan of a scalar
* \sa class CwiseUnaryOp, Cwise::tan()
* \sa class CwiseUnaryOp, ArrayBase::tan()
*/
template<typename Scalar> struct scalar_tan_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
@@ -725,6 +759,44 @@ struct functor_traits<scalar_tan_op<Scalar> >
};
};
/** \internal
* \brief Template functor to compute the arc cosine of a scalar
* \sa class CwiseUnaryOp, ArrayBase::acos()
*/
template<typename Scalar> struct scalar_acos_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
inline const Scalar operator() (const Scalar& a) const { return acos(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
};
template<typename Scalar>
struct functor_traits<scalar_acos_op<Scalar> >
{
enum {
Cost = 5 * NumTraits<Scalar>::MulCost,
PacketAccess = packet_traits<Scalar>::HasACos
};
};
/** \internal
* \brief Template functor to compute the arc sine of a scalar
* \sa class CwiseUnaryOp, ArrayBase::asin()
*/
template<typename Scalar> struct scalar_asin_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
inline const Scalar operator() (const Scalar& a) const { return asin(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
};
template<typename Scalar>
struct functor_traits<scalar_asin_op<Scalar> >
{
enum {
Cost = 5 * NumTraits<Scalar>::MulCost,
PacketAccess = packet_traits<Scalar>::HasASin
};
};
/** \internal
* \brief Template functor to raise a scalar to a power
* \sa class CwiseUnaryOp, Cwise::pow

View File

@@ -34,9 +34,10 @@ struct isApprox_selector
{
static bool run(const Derived& x, const OtherDerived& y, typename Derived::RealScalar prec)
{
const typename internal::nested<Derived,2>::type nested(x);
const typename internal::nested<OtherDerived,2>::type otherNested(y);
return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * std::min(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
using std::min;
typename internal::nested<Derived,2>::type nested(x);
typename internal::nested<OtherDerived,2>::type otherNested(y);
return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
}
};

View File

@@ -0,0 +1,624 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_GENERAL_PRODUCT_H
#define EIGEN_GENERAL_PRODUCT_H
/** \class GeneralProduct
* \ingroup Core_Module
*
* \brief Expression of the product of two general matrices or vectors
*
* \param LhsNested the type used to store the left-hand side
* \param RhsNested the type used to store the right-hand side
* \param ProductMode the type of the product
*
* This class represents an expression of the product of two general matrices.
* We call a general matrix, a dense matrix with full storage. For instance,
* This excludes triangular, selfadjoint, and sparse matrices.
* It is the return type of the operator* between general matrices. Its template
* arguments are determined automatically by ProductReturnType. Therefore,
* GeneralProduct should never be used direclty. To determine the result type of a
* function which involves a matrix product, use ProductReturnType::Type.
*
* \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
*/
template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value>
class GeneralProduct;
enum {
Large = 2,
Small = 3
};
namespace internal {
template<int Rows, int Cols, int Depth> struct product_type_selector;
template<int Size, int MaxSize> struct product_size_category
{
enum { is_large = MaxSize == Dynamic ||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
value = is_large ? Large
: Size == 1 ? 1
: Small
};
};
template<typename Lhs, typename Rhs> struct product_type
{
typedef typename remove_all<Lhs>::type _Lhs;
typedef typename remove_all<Rhs>::type _Rhs;
enum {
MaxRows = _Lhs::MaxRowsAtCompileTime,
Rows = _Lhs::RowsAtCompileTime,
MaxCols = _Rhs::MaxColsAtCompileTime,
Cols = _Rhs::ColsAtCompileTime,
MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
_Rhs::MaxRowsAtCompileTime),
Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
_Rhs::RowsAtCompileTime),
LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
};
// the splitting into different lines of code here, introducing the _select enums and the typedef below,
// is to work around an internal compiler error with gcc 4.1 and 4.2.
private:
enum {
rows_select = product_size_category<Rows,MaxRows>::value,
cols_select = product_size_category<Cols,MaxCols>::value,
depth_select = product_size_category<Depth,MaxDepth>::value
};
typedef product_type_selector<rows_select, cols_select, depth_select> selector;
public:
enum {
value = selector::ret
};
#ifdef EIGEN_DEBUG_PRODUCT
static void debug()
{
EIGEN_DEBUG_VAR(Rows);
EIGEN_DEBUG_VAR(Cols);
EIGEN_DEBUG_VAR(Depth);
EIGEN_DEBUG_VAR(rows_select);
EIGEN_DEBUG_VAR(cols_select);
EIGEN_DEBUG_VAR(depth_select);
EIGEN_DEBUG_VAR(value);
}
#endif
};
/* The following allows to select the kind of product at compile time
* based on the three dimensions of the product.
* This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
// FIXME I'm not sure the current mapping is the ideal one.
template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; };
template<> struct product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
} // end namespace internal
/** \class ProductReturnType
* \ingroup Core_Module
*
* \brief Helper class to get the correct and optimized returned type of operator*
*
* \param Lhs the type of the left-hand side
* \param Rhs the type of the right-hand side
* \param ProductMode the type of the product (determined automatically by internal::product_mode)
*
* This class defines the typename Type representing the optimized product expression
* between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
* is the recommended way to define the result type of a function returning an expression
* which involve a matrix product. The class Product should never be
* used directly.
*
* \sa class Product, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
*/
template<typename Lhs, typename Rhs, int ProductType>
struct ProductReturnType
{
// TODO use the nested type to reduce instanciations ????
// typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
// typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type;
};
template<typename Lhs, typename Rhs>
struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
{
typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
};
template<typename Lhs, typename Rhs>
struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
{
typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
};
// this is a workaround for sun CC
template<typename Lhs, typename Rhs>
struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
{};
/***********************************************************************
* Implementation of Inner Vector Vector Product
***********************************************************************/
// FIXME : maybe the "inner product" could return a Scalar
// instead of a 1x1 matrix ??
// Pro: more natural for the user
// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
// product ends up to a row-vector times col-vector product... To tackle this use
// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
namespace internal {
template<typename Lhs, typename Rhs>
struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
: traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
{};
}
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, InnerProduct>
: internal::no_assignment_operator,
public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
{
typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
public:
GeneralProduct(const Lhs& lhs, const Rhs& rhs)
{
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
}
/** Convertion to scalar */
operator const typename Base::Scalar() const {
return Base::coeff(0,0);
}
};
/***********************************************************************
* Implementation of Outer Vector Vector Product
***********************************************************************/
namespace internal {
template<int StorageOrder> struct outer_product_selector;
template<typename Lhs, typename Rhs>
struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
: traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
{};
}
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, OuterProduct>
: public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
{
public:
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
}
template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
{
internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha);
}
};
namespace internal {
template<> struct outer_product_selector<ColMajor> {
template<typename ProductType, typename Dest>
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
typedef typename Dest::Index Index;
// FIXME make sure lhs is sequentially stored
// FIXME not very good if rhs is real and lhs complex while alpha is real too
const Index cols = dest.cols();
for (Index j=0; j<cols; ++j)
dest.col(j) += (alpha * prod.rhs().coeff(j)) * prod.lhs();
}
};
template<> struct outer_product_selector<RowMajor> {
template<typename ProductType, typename Dest>
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
typedef typename Dest::Index Index;
// FIXME make sure rhs is sequentially stored
// FIXME not very good if lhs is real and rhs complex while alpha is real too
const Index rows = dest.rows();
for (Index i=0; i<rows; ++i)
dest.row(i) += (alpha * prod.lhs().coeff(i)) * prod.rhs();
}
};
} // end namespace internal
/***********************************************************************
* Implementation of General Matrix Vector Product
***********************************************************************/
/* According to the shape/flags of the matrix we have to distinghish 3 different cases:
* 1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine
* 2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine
* 3 - all other cases are handled using a simple loop along the outer-storage direction.
* Therefore we need a lower level meta selector.
* Furthermore, if the matrix is the rhs, then the product has to be transposed.
*/
namespace internal {
template<typename Lhs, typename Rhs>
struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
: traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
{};
template<int Side, int StorageOrder, bool BlasCompatible>
struct gemv_selector;
} // end namespace internal
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, GemvProduct>
: public ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs>
{
public:
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{
// EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value),
// YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
}
enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
}
};
namespace internal {
// The vector is on the left => transposition
template<int StorageOrder, bool BlasCompatible>
struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
Transpose<Dest> destT(dest);
enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct>
(prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha);
}
};
template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
{
EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
};
template<typename Scalar,int Size>
struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
{
EIGEN_STRONG_INLINE Scalar* data() { return 0; }
};
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
{
#if EIGEN_ALIGN_STATICALLY
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
#else
// Some architectures cannot align on the stack,
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
enum {
ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
PacketSize = internal::packet_traits<Scalar>::size
};
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() {
return ForceAlignment
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(15))) + 16)
: m_data.array;
}
#endif
};
template<> struct gemv_selector<OnTheRight,ColMajor,true>
{
template<typename ProductType, typename Dest>
static inline void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename ProductType::Index Index;
typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
typedef typename ProductType::Scalar ResScalar;
typedef typename ProductType::RealScalar RealScalar;
typedef typename ProductType::ActualLhsType ActualLhsType;
typedef typename ProductType::ActualRhsType ActualRhsType;
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
};
gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
bool alphaIsCompatible = (!ComplexByReal) || (imag(actualAlpha)==RealScalar(0));
bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
evalToDest ? dest.data() : static_dest.data());
if(!evalToDest)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if(!alphaIsCompatible)
{
MappedDest(actualDestPtr, dest.size()).setZero();
compatibleAlpha = RhsScalar(1);
}
else
MappedDest(actualDestPtr, dest.size()) = dest;
}
general_matrix_vector_product
<Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
actualLhs.data(), actualLhs.outerStride(),
actualRhs.data(), actualRhs.innerStride(),
actualDestPtr, 1,
compatibleAlpha);
if (!evalToDest)
{
if(!alphaIsCompatible)
dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
else
dest = MappedDest(actualDestPtr, dest.size());
}
}
};
template<> struct gemv_selector<OnTheRight,RowMajor,true>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
typedef typename ProductType::Scalar ResScalar;
typedef typename ProductType::Index Index;
typedef typename ProductType::ActualLhsType ActualLhsType;
typedef typename ProductType::ActualRhsType ActualRhsType;
typedef typename ProductType::_ActualRhsType _ActualRhsType;
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
};
gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
if(!DirectlyUseRhs)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = actualRhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
}
general_matrix_vector_product
<Index,LhsScalar,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
actualLhs.data(), actualLhs.outerStride(),
actualRhsPtr, 1,
dest.data(), dest.innerStride(),
actualAlpha);
}
};
template<> struct gemv_selector<OnTheRight,ColMajor,false>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename Dest::Index Index;
// TODO makes sure dest is sequentially stored in memory, otherwise use a temp
const Index size = prod.rhs().rows();
for(Index k=0; k<size; ++k)
dest += (alpha*prod.rhs().coeff(k)) * prod.lhs().col(k);
}
};
template<> struct gemv_selector<OnTheRight,RowMajor,false>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename Dest::Index Index;
// TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
const Index rows = prod.rows();
for(Index i=0; i<rows; ++i)
dest.coeffRef(i) += alpha * (prod.lhs().row(i).cwiseProduct(prod.rhs().transpose())).sum();
}
};
} // end namespace internal
/***************************************************************************
* Implementation of matrix base methods
***************************************************************************/
/** \returns the matrix product of \c *this and \a other.
*
* \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*().
*
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
*/
template<typename Derived>
template<typename OtherDerived>
inline const typename ProductReturnType<Derived, OtherDerived>::Type
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
{
// A note regarding the function declaration: In MSVC, this function will sometimes
// not be inlined since DenseStorage is an unwindable object for dynamic
// matrices and product types are holding a member to store the result.
// Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
enum {
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|| OtherDerived::RowsAtCompileTime==Dynamic
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
};
// note to the lost user:
// * for a dot product use: v1.dot(v2)
// * for a coeff-wise product use: v1.cwiseProduct(v2)
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
#ifdef EIGEN_DEBUG_PRODUCT
internal::product_type<Derived,OtherDerived>::debug();
#endif
return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
}
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
*
* The returned product will behave like any other expressions: the coefficients of the product will be
* computed once at a time as requested. This might be useful in some extremely rare cases when only
* a small and no coherent fraction of the result's coefficients have to be computed.
*
* \warning This version of the matrix product can be much much slower. So use it only if you know
* what you are doing and that you measured a true speed improvement.
*
* \sa operator*(const MatrixBase&)
*/
template<typename Derived>
template<typename OtherDerived>
const typename LazyProductReturnType<Derived,OtherDerived>::Type
MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
{
enum {
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|| OtherDerived::RowsAtCompileTime==Dynamic
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
};
// note to the lost user:
// * for a dot product use: v1.dot(v2)
// * for a coeff-wise product use: v1.cwiseProduct(v2)
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
}
#endif // EIGEN_PRODUCT_H

View File

@@ -134,12 +134,12 @@ pdiv(const Packet& a,
/** \internal \returns the min of \a a and \a b (coeff-wise) */
template<typename Packet> inline Packet
pmin(const Packet& a,
const Packet& b) { return std::min(a, b); }
const Packet& b) { using std::min; return (min)(a, b); }
/** \internal \returns the max of \a a and \a b (coeff-wise) */
template<typename Packet> inline Packet
pmax(const Packet& a,
const Packet& b) { return std::max(a, b); }
const Packet& b) { using std::max; return (max)(a, b); }
/** \internal \returns the absolute value of \a a */
template<typename Packet> inline Packet
@@ -225,15 +225,20 @@ template<typename Packet> inline typename unpacket_traits<Packet>::type predux_m
template<typename Packet> inline Packet preverse(const Packet& a)
{ return a; }
/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
template<typename Packet> inline Packet pcplxflip(const Packet& a)
{ return Packet(imag(a),real(a)); }
/**************************
* Special math functions
***************************/
/** \internal \returns the sin of \a a (coeff-wise) */
/** \internal \returns the sine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet psin(const Packet& a) { return sin(a); }
/** \internal \returns the cos of \a a (coeff-wise) */
/** \internal \returns the cosine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pcos(const Packet& a) { return cos(a); }
@@ -241,6 +246,14 @@ Packet pcos(const Packet& a) { return cos(a); }
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ptan(const Packet& a) { return tan(a); }
/** \internal \returns the arc sine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pasin(const Packet& a) { return asin(a); }
/** \internal \returns the arc cosine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pacos(const Packet& a) { return acos(a); }
/** \internal \returns the exp of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pexp(const Packet& a) { return exp(a); }
@@ -257,6 +270,14 @@ Packet psqrt(const Packet& a) { return sqrt(a); }
* The following functions might not have to be overwritten for vectorized types
***************************************************************************/
/** \internal copy a packet with constant coeficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
// NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
template<typename Packet>
inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
{
pstore(to, pset1<Packet>(a));
}
/** \internal \returns a * b + c (coeff-wise) */
template<typename Packet> inline Packet
pmadd(const Packet& a,
@@ -265,7 +286,7 @@ pmadd(const Packet& a,
{ return padd(pmul(a, b),c); }
/** \internal \returns a packet version of \a *from.
* \If LoadMode equals Aligned, \a from must be 16 bytes aligned */
* If LoadMode equals #Aligned, \a from must be 16 bytes aligned */
template<typename Packet, int LoadMode>
inline Packet ploadt(const typename unpacket_traits<Packet>::type* from)
{
@@ -276,7 +297,7 @@ inline Packet ploadt(const typename unpacket_traits<Packet>::type* from)
}
/** \internal copy the packet \a from to \a *to.
* If StoreMode equals Aligned, \a to must be 16 bytes aligned */
* If StoreMode equals #Aligned, \a to must be 16 bytes aligned */
template<typename Scalar, typename Packet, int LoadMode>
inline void pstoret(Scalar* to, const Packet& from)
{
@@ -291,7 +312,7 @@ template<int Offset,typename PacketType>
struct palign_impl
{
// by default data are aligned, so there is nothing to be done :)
inline static void run(PacketType&, const PacketType&) {}
static inline void run(PacketType&, const PacketType&) {}
};
/** \internal update \a first using the concatenation of the \a Offset last elements

View File

@@ -56,6 +56,8 @@ namespace std
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(imag,scalar_imag_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(sin,scalar_sin_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(cos,scalar_cos_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(asin,scalar_asin_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(acos,scalar_acos_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(tan,scalar_tan_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(exp,scalar_exp_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(log,scalar_log_op)
@@ -77,6 +79,8 @@ namespace Eigen
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(imag,scalar_imag_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(sin,scalar_sin_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(cos,scalar_cos_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(asin,scalar_asin_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(acos,scalar_acos_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(tan,scalar_tan_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(exp,scalar_exp_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(log,scalar_log_op)

View File

@@ -141,7 +141,8 @@ struct significant_decimals_default_impl
typedef typename NumTraits<Scalar>::Real RealScalar;
static inline int run()
{
return cast<RealScalar,int>(std::ceil(-log(NumTraits<RealScalar>::epsilon())/log(RealScalar(10))));
using std::ceil;
return cast<RealScalar,int>(ceil(-log(NumTraits<RealScalar>::epsilon())/log(RealScalar(10))));
}
};
@@ -170,7 +171,7 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
return s;
}
const typename Derived::Nested m = _m;
typename Derived::Nested m = _m;
typedef typename Derived::Scalar Scalar;
typedef typename Derived::Index Index;

View File

@@ -31,10 +31,10 @@
*
* \brief A matrix or vector expression mapping an existing array of data.
*
* \param PlainObjectType the equivalent matrix type of the mapped data
* \param MapOptions specifies whether the pointer is \c Aligned, or \c Unaligned.
* The default is \c Unaligned.
* \param StrideType optionnally specifies strides. By default, Map assumes the memory layout
* \tparam PlainObjectType the equivalent matrix type of the mapped data
* \tparam MapOptions specifies whether the pointer is \c #Aligned, or \c #Unaligned.
* The default is \c #Unaligned.
* \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout
* of an ordinary, contiguous array. This can be overridden by specifying strides.
* The type passed here must be a specialization of the Stride template, see examples below.
*
@@ -72,9 +72,9 @@
* Example: \include Map_placement_new.cpp
* Output: \verbinclude Map_placement_new.out
*
* This class is the return type of Matrix::Map() but can also be used directly.
* This class is the return type of PlainObjectBase::Map() but can also be used directly.
*
* \sa Matrix::Map(), \ref TopicStorageOrders
* \sa PlainObjectBase::Map(), \ref TopicStorageOrders
*/
namespace internal {
@@ -95,7 +95,7 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
HasNoInnerStride = InnerStrideAtCompileTime == 1,
HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
HasNoStride = HasNoInnerStride && HasNoOuterStride,
IsAligned = int(int(MapOptions)&Aligned)==Aligned,
IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
KeepsPacketAccess = bool(HasNoInnerStride)
&& ( bool(IsDynamicSize)
@@ -192,14 +192,14 @@ template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int
inline Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>
::Array(const Scalar *data)
{
_set_noalias(Eigen::Map<const Array>(data));
this->_set_noalias(Eigen::Map<const Array>(data));
}
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
inline Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>
::Matrix(const Scalar *data)
{
_set_noalias(Eigen::Map<const Matrix>(data));
this->_set_noalias(Eigen::Map<const Matrix>(data));
}
#endif // EIGEN_MAP_H

View File

@@ -85,6 +85,8 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
using Base::rowStride;
using Base::colStride;
// bug 217 - compile error on ICC 11.1
using Base::operator=;
typedef typename Base::CoeffReturnType CoeffReturnType;
@@ -168,8 +170,8 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit,
internal::inner_stride_at_compile_time<Derived>::ret==1),
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % (sizeof(Scalar)*internal::packet_traits<Scalar>::size)) == 0)
&& "data is not aligned");
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % 16) == 0)
&& "data is not aligned");
}
PointerType m_data;
@@ -236,7 +238,7 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
(this->m_data + index * innerStride(), x);
}
inline MapBase(PointerType data) : Base(data) {}
explicit inline MapBase(PointerType data) : Base(data) {}
inline MapBase(PointerType data, Index size) : Base(data, size) {}
inline MapBase(PointerType data, Index rows, Index cols) : Base(data, rows, cols) {}

View File

@@ -87,7 +87,8 @@ struct real_impl<std::complex<RealScalar> >
{
static inline RealScalar run(const std::complex<RealScalar>& x)
{
return std::real(x);
using std::real;
return real(x);
}
};
@@ -122,7 +123,8 @@ struct imag_impl<std::complex<RealScalar> >
{
static inline RealScalar run(const std::complex<RealScalar>& x)
{
return std::imag(x);
using std::imag;
return imag(x);
}
};
@@ -244,7 +246,8 @@ struct conj_impl<std::complex<RealScalar> >
{
static inline std::complex<RealScalar> run(const std::complex<RealScalar>& x)
{
return std::conj(x);
using std::conj;
return conj(x);
}
};
@@ -270,7 +273,8 @@ struct abs_impl
typedef typename NumTraits<Scalar>::Real RealScalar;
static inline RealScalar run(const Scalar& x)
{
return std::abs(x);
using std::abs;
return abs(x);
}
};
@@ -305,7 +309,7 @@ struct abs2_impl<std::complex<RealScalar> >
{
static inline RealScalar run(const std::complex<RealScalar>& x)
{
return std::norm(x);
return real(x)*real(x) + imag(x)*imag(x);
}
};
@@ -369,10 +373,12 @@ struct hypot_impl
typedef typename NumTraits<Scalar>::Real RealScalar;
static inline RealScalar run(const Scalar& x, const Scalar& y)
{
using std::max;
using std::min;
RealScalar _x = abs(x);
RealScalar _y = abs(y);
RealScalar p = std::max(_x, _y);
RealScalar q = std::min(_x, _y);
RealScalar p = (max)(_x, _y);
RealScalar q = (min)(_x, _y);
RealScalar qp = q/p;
return p * sqrt(RealScalar(1) + qp*qp);
}
@@ -420,7 +426,8 @@ struct sqrt_default_impl
{
static inline Scalar run(const Scalar& x)
{
return std::sqrt(x);
using std::sqrt;
return sqrt(x);
}
};
@@ -454,194 +461,36 @@ inline EIGEN_MATHFUNC_RETVAL(sqrt, Scalar) sqrt(const Scalar& x)
}
/****************************************************************************
* Implementation of exp *
* Implementation of standard unary real functions (exp, log, sin, cos, ... *
****************************************************************************/
template<typename Scalar, bool IsInteger>
struct exp_default_impl
{
static inline Scalar run(const Scalar& x)
{
return std::exp(x);
// This macro instanciate all the necessary template mechanism which is common to all unary real functions.
#define EIGEN_MATHFUNC_STANDARD_REAL_UNARY(NAME) \
template<typename Scalar, bool IsInteger> struct NAME##_default_impl { \
static inline Scalar run(const Scalar& x) { using std::NAME; return NAME(x); } \
}; \
template<typename Scalar> struct NAME##_default_impl<Scalar, true> { \
static inline Scalar run(const Scalar&) { \
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) \
return Scalar(0); \
} \
}; \
template<typename Scalar> struct NAME##_impl \
: NAME##_default_impl<Scalar, NumTraits<Scalar>::IsInteger> \
{}; \
template<typename Scalar> struct NAME##_retval { typedef Scalar type; }; \
template<typename Scalar> \
inline EIGEN_MATHFUNC_RETVAL(NAME, Scalar) NAME(const Scalar& x) { \
return EIGEN_MATHFUNC_IMPL(NAME, Scalar)::run(x); \
}
};
template<typename Scalar>
struct exp_default_impl<Scalar, true>
{
static inline Scalar run(const Scalar&)
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
return Scalar(0);
}
};
template<typename Scalar>
struct exp_impl : exp_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {};
template<typename Scalar>
struct exp_retval
{
typedef Scalar type;
};
template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(exp, Scalar) exp(const Scalar& x)
{
return EIGEN_MATHFUNC_IMPL(exp, Scalar)::run(x);
}
/****************************************************************************
* Implementation of cos *
****************************************************************************/
template<typename Scalar, bool IsInteger>
struct cos_default_impl
{
static inline Scalar run(const Scalar& x)
{
return std::cos(x);
}
};
template<typename Scalar>
struct cos_default_impl<Scalar, true>
{
static inline Scalar run(const Scalar&)
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
return Scalar(0);
}
};
template<typename Scalar>
struct cos_impl : cos_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {};
template<typename Scalar>
struct cos_retval
{
typedef Scalar type;
};
template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(cos, Scalar) cos(const Scalar& x)
{
return EIGEN_MATHFUNC_IMPL(cos, Scalar)::run(x);
}
/****************************************************************************
* Implementation of sin *
****************************************************************************/
template<typename Scalar, bool IsInteger>
struct sin_default_impl
{
static inline Scalar run(const Scalar& x)
{
return std::sin(x);
}
};
template<typename Scalar>
struct sin_default_impl<Scalar, true>
{
static inline Scalar run(const Scalar&)
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
return Scalar(0);
}
};
template<typename Scalar>
struct sin_impl : sin_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {};
template<typename Scalar>
struct sin_retval
{
typedef Scalar type;
};
template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(sin, Scalar) sin(const Scalar& x)
{
return EIGEN_MATHFUNC_IMPL(sin, Scalar)::run(x);
}
/****************************************************************************
* Implementation of tan *
****************************************************************************/
template<typename Scalar, bool IsInteger>
struct tan_default_impl
{
static inline Scalar run(const Scalar& x)
{
return std::tan(x);
}
};
template<typename Scalar>
struct tan_default_impl<Scalar, true>
{
static inline Scalar run(const Scalar&)
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
return Scalar(0);
}
};
template<typename Scalar>
struct tan_impl : tan_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {};
template<typename Scalar>
struct tan_retval
{
typedef Scalar type;
};
template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(tan, Scalar) tan(const Scalar& x)
{
return EIGEN_MATHFUNC_IMPL(tan, Scalar)::run(x);
}
/****************************************************************************
* Implementation of log *
****************************************************************************/
template<typename Scalar, bool IsInteger>
struct log_default_impl
{
static inline Scalar run(const Scalar& x)
{
return std::log(x);
}
};
template<typename Scalar>
struct log_default_impl<Scalar, true>
{
static inline Scalar run(const Scalar&)
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
return Scalar(0);
}
};
template<typename Scalar>
struct log_impl : log_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {};
template<typename Scalar>
struct log_retval
{
typedef Scalar type;
};
template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(log, Scalar) log(const Scalar& x)
{
return EIGEN_MATHFUNC_IMPL(log, Scalar)::run(x);
}
EIGEN_MATHFUNC_STANDARD_REAL_UNARY(exp)
EIGEN_MATHFUNC_STANDARD_REAL_UNARY(log)
EIGEN_MATHFUNC_STANDARD_REAL_UNARY(sin)
EIGEN_MATHFUNC_STANDARD_REAL_UNARY(cos)
EIGEN_MATHFUNC_STANDARD_REAL_UNARY(tan)
EIGEN_MATHFUNC_STANDARD_REAL_UNARY(asin)
EIGEN_MATHFUNC_STANDARD_REAL_UNARY(acos)
/****************************************************************************
* Implementation of atan2 *
@@ -653,7 +502,8 @@ struct atan2_default_impl
typedef Scalar retval;
static inline Scalar run(const Scalar& x, const Scalar& y)
{
return std::atan2(x, y);
using std::atan2;
return atan2(x, y);
}
};
@@ -692,7 +542,8 @@ struct pow_default_impl
typedef Scalar retval;
static inline Scalar run(const Scalar& x, const Scalar& y)
{
return std::pow(x, y);
using std::pow;
return pow(x, y);
}
};
@@ -701,7 +552,7 @@ struct pow_default_impl<Scalar, true>
{
static inline Scalar run(Scalar x, Scalar y)
{
Scalar res = 1;
Scalar res(1);
eigen_assert(!NumTraits<Scalar>::IsSigned || y >= 0);
if(y & 1) res *= x;
y >>= 1;
@@ -884,7 +735,8 @@ struct scalar_fuzzy_default_impl<Scalar, false, false>
}
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
{
return abs(x - y) <= std::min(abs(x), abs(y)) * prec;
using std::min;
return abs(x - y) <= (min)(abs(x), abs(y)) * prec;
}
static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec)
{
@@ -922,7 +774,8 @@ struct scalar_fuzzy_default_impl<Scalar, true, false>
}
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
{
return abs2(x - y) <= std::min(abs2(x), abs2(y)) * prec * prec;
using std::min;
return abs2(x - y) <= (min)(abs2(x), abs2(y)) * prec * prec;
}
};
@@ -984,6 +837,17 @@ template<> struct scalar_fuzzy_impl<bool>
};
/****************************************************************************
* Special functions *
****************************************************************************/
// std::isfinite is non standard, so let's define our own version,
// even though it is not very efficient.
template<typename T> bool isfinite(const T& x)
{
return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
}
} // end namespace internal
#endif // EIGEN_MATHFUNCTIONS_H

View File

@@ -43,8 +43,8 @@
* \tparam _Cols Number of columns, or \b Dynamic
*
* The remaining template parameters are optional -- in most cases you don't have to worry about them.
* \tparam _Options \anchor matrix_tparam_options A combination of either \b RowMajor or \b ColMajor, and of either
* \b AutoAlign or \b DontAlign.
* \tparam _Options \anchor matrix_tparam_options A combination of either \b #RowMajor or \b #ColMajor, and of either
* \b #AutoAlign or \b #DontAlign.
* The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required
* for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size.
* \tparam _MaxRows Maximum number of rows. Defaults to \a _Rows (\ref maxrows "note").
@@ -79,6 +79,9 @@
* m(0, 3) = 3;
* \endcode
*
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN.
*
* <i><b>Some notes:</b></i>
*
* <dl>
@@ -150,10 +153,6 @@ class Matrix
typedef typename Base::PlainObject PlainObject;
enum { NeedsToAlign = (!(Options&DontAlign))
&& SizeAtCompileTime!=Dynamic && ((static_cast<int>(sizeof(Scalar))*SizeAtCompileTime)%16)==0 };
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
using Base::base;
using Base::coeffRef;
@@ -412,25 +411,6 @@ EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
#undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES
#undef EIGEN_MAKE_TYPEDEFS
#undef EIGEN_MAKE_TYPEDEFS_LARGE
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
using Eigen::Matrix##SizeSuffix##TypeSuffix; \
using Eigen::Vector##SizeSuffix##TypeSuffix; \
using Eigen::RowVector##SizeSuffix##TypeSuffix;
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \
#define EIGEN_USING_MATRIX_TYPEDEFS \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd)
#undef EIGEN_MAKE_FIXED_TYPEDEFS
#endif // EIGEN_MATRIX_H

View File

@@ -38,7 +38,7 @@
* Note that some methods are defined in other modules such as the \ref LU_Module LU module
* for all functions related to matrix inversions.
*
* \param Derived is the derived type, e.g. a matrix type, or an expression, etc.
* \tparam Derived is the derived type, e.g. a matrix type, or an expression, etc.
*
* When writing a function taking Eigen objects as argument, if you want your function
* to take as argument any matrix, vector, or expression, just let it take a
@@ -53,6 +53,9 @@
}
* \endcode
*
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN.
*
* \sa \ref TopicClassHierarchy
*/
template<typename Derived> class MatrixBase
@@ -108,7 +111,7 @@ template<typename Derived> class MatrixBase
/** \returns the size of the main diagonal, which is min(rows(),cols()).
* \sa rows(), cols(), SizeAtCompileTime. */
inline Index diagonalSize() const { return std::min(rows(),cols()); }
inline Index diagonalSize() const { return (std::min)(rows(),cols()); }
/** \brief The plain matrix type corresponding to this expression.
*
@@ -327,7 +330,7 @@ template<typename Derived> class MatrixBase
/** \returns an \link ArrayBase Array \endlink expression of this matrix
* \sa ArrayBase::matrix() */
ArrayWrapper<Derived> array() { return derived(); }
const ArrayWrapper<Derived> array() const { return derived(); }
const ArrayWrapper<const Derived> array() const { return derived(); }
/////////// LU module ///////////
@@ -462,6 +465,8 @@ template<typename Derived> class MatrixBase
const MatrixFunctionReturnValue<Derived> sinh() const;
const MatrixFunctionReturnValue<Derived> cos() const;
const MatrixFunctionReturnValue<Derived> sin() const;
const MatrixSquareRootReturnValue<Derived> sqrt() const;
const MatrixLogarithmReturnValue<Derived> log() const;
#ifdef EIGEN2_SUPPORT
template<typename ProductDerived, typename Lhs, typename Rhs>
@@ -508,10 +513,10 @@ template<typename Derived> class MatrixBase
protected:
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& )
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator-=(const ArrayBase<OtherDerived>& )
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
};
#endif // EIGEN_MATRIXBASE_H

View File

@@ -81,14 +81,14 @@ template<typename T> struct GenericNumTraits
>::type NonInteger;
typedef T Nested;
inline static Real epsilon() { return std::numeric_limits<T>::epsilon(); }
inline static Real dummy_precision()
static inline Real epsilon() { return std::numeric_limits<T>::epsilon(); }
static inline Real dummy_precision()
{
// make sure to override this for floating-point types
return Real(0);
}
inline static T highest() { return std::numeric_limits<T>::max(); }
inline static T lowest() { return IsInteger ? std::numeric_limits<T>::min() : (-std::numeric_limits<T>::max()); }
static inline T highest() { return (std::numeric_limits<T>::max)(); }
static inline T lowest() { return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)()); }
#ifdef EIGEN2_SUPPORT
enum {
@@ -104,12 +104,12 @@ template<typename T> struct NumTraits : GenericNumTraits<T>
template<> struct NumTraits<float>
: GenericNumTraits<float>
{
inline static float dummy_precision() { return 1e-5f; }
static inline float dummy_precision() { return 1e-5f; }
};
template<> struct NumTraits<double> : GenericNumTraits<double>
{
inline static double dummy_precision() { return 1e-12; }
static inline double dummy_precision() { return 1e-12; }
};
template<> struct NumTraits<long double>
@@ -130,8 +130,8 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
};
inline static Real epsilon() { return NumTraits<Real>::epsilon(); }
inline static Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
};
template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>

View File

@@ -511,7 +511,7 @@ class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesTyp
protected:
const typename IndicesType::Nested m_indices;
typename IndicesType::Nested m_indices;
};
/** \returns the matrix with the permutation applied to the columns.
@@ -608,7 +608,7 @@ struct permut_matrix_product_retval
protected:
const PermutationType& m_permutation;
const typename MatrixType::Nested m_matrix;
typename MatrixType::Nested m_matrix;
};
/* Template partial specialization for transposed/inverse permutations */

View File

@@ -34,18 +34,56 @@
namespace internal {
template<typename Index>
EIGEN_ALWAYS_INLINE void check_rows_cols_for_overflow(Index rows, Index cols)
{
// http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
// we assume Index is signed
Index max_index = (size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed
bool error = (rows < 0 || cols < 0) ? true
: (rows == 0 || cols == 0) ? false
: (rows > max_index / cols);
if (error)
throw_std_bad_alloc();
}
template <typename Derived, typename OtherDerived = Derived, bool IsVector = static_cast<bool>(Derived::IsVectorAtCompileTime)> struct conservative_resize_like_impl;
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct matrix_swap_impl;
} // end namespace internal
/**
/** \class PlainObjectBase
* \brief %Dense storage base class for matrices and arrays.
*
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN.
*
* \sa \ref TopicClassHierarchy
*/
#ifdef EIGEN_PARSED_BY_DOXYGEN
namespace internal {
// this is a warkaround to doxygen not being able to understand the inheritence logic
// when it is hidden by the dense_xpr_base helper struct.
template<typename Derived> struct dense_xpr_base_dispatcher_for_doxygen;// : public MatrixBase<Derived> {};
/** This class is just a workaround for Doxygen and it does not not actually exist. */
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct dense_xpr_base_dispatcher_for_doxygen<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
: public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
/** This class is just a workaround for Doxygen and it does not not actually exist. */
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct dense_xpr_base_dispatcher_for_doxygen<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
: public ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
} // namespace internal
template<typename Derived>
class PlainObjectBase : public internal::dense_xpr_base_dispatcher_for_doxygen<Derived>
#else
template<typename Derived>
class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
#endif
{
public:
enum { Options = internal::traits<Derived>::Options };
@@ -80,14 +118,12 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
template<typename StrideType> struct StridedConstMapType { typedef Eigen::Map<const Derived, Unaligned, StrideType> type; };
template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, Aligned, StrideType> type; };
template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, Aligned, StrideType> type; };
protected:
DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;
public:
enum { NeedsToAlign = (!(Options&DontAlign))
&& SizeAtCompileTime!=Dynamic && ((static_cast<int>(sizeof(Scalar))*SizeAtCompileTime)%16)==0 };
enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::Flags & AlignedBit) != 0 };
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
Base& base() { return *static_cast<Base*>(this); }
@@ -196,11 +232,13 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
EIGEN_STRONG_INLINE void resize(Index rows, Index cols)
{
#ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
internal::check_rows_cols_for_overflow(rows, cols);
Index size = rows*cols;
bool size_changed = size != this->size();
m_storage.resize(size, rows, cols);
if(size_changed) EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
#else
internal::check_rows_cols_for_overflow(rows, cols);
m_storage.resize(rows*cols, rows, cols);
#endif
}
@@ -269,6 +307,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)
{
const OtherDerived& other = _other.derived();
internal::check_rows_cols_for_overflow(other.rows(), other.cols());
const Index othersize = other.rows()*other.cols();
if(RowsAtCompileTime == 1)
{
@@ -283,33 +322,47 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
else resize(other.rows(), other.cols());
}
/** Resizes \c *this to a \a rows x \a cols matrix while leaving old values of \c *this untouched.
/** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
*
* This method is intended for dynamic-size matrices. If you only want to change the number
* of rows and/or of columns, you can use conservativeResize(NoChange_t, Index),
* The method is intended for matrices of dynamic size. If you only want to change the number
* of rows and/or of columns, you can use conservativeResize(NoChange_t, Index) or
* conservativeResize(Index, NoChange_t).
*
* The top-left part of the resized matrix will be the same as the overlapping top-left corner
* of \c *this. In case values need to be appended to the matrix they will be uninitialized.
* Matrices are resized relative to the top-left element. In case values need to be
* appended to the matrix they will be uninitialized.
*/
EIGEN_STRONG_INLINE void conservativeResize(Index rows, Index cols)
{
internal::conservative_resize_like_impl<Derived>::run(*this, rows, cols);
}
/** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
*
* As opposed to conservativeResize(Index rows, Index cols), this version leaves
* the number of columns unchanged.
*
* In case the matrix is growing, new rows will be uninitialized.
*/
EIGEN_STRONG_INLINE void conservativeResize(Index rows, NoChange_t)
{
// Note: see the comment in conservativeResize(Index,Index)
conservativeResize(rows, cols());
}
/** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
*
* As opposed to conservativeResize(Index rows, Index cols), this version leaves
* the number of rows unchanged.
*
* In case the matrix is growing, new columns will be uninitialized.
*/
EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index cols)
{
// Note: see the comment in conservativeResize(Index,Index)
conservativeResize(rows(), cols);
}
/** Resizes \c *this to a vector of length \a size while retaining old values of *this.
/** Resizes the vector to \a size while retaining old values.
*
* \only_for_vectors. This method does not work for
* partially dynamic matrices when the static dimension is anything other
@@ -322,6 +375,15 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
internal::conservative_resize_like_impl<Derived>::run(*this, size);
}
/** Resizes the matrix to \a rows x \a cols of \c other, while leaving old values untouched.
*
* The method is intended for matrices of dynamic size. If you only want to change the number
* of rows and/or of columns, you can use conservativeResize(NoChange_t, Index) or
* conservativeResize(Index, NoChange_t).
*
* Matrices are resized relative to the top-left element. In case values need to be
* appended to the matrix they will copied from \c other.
*/
template<typename OtherDerived>
EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase<OtherDerived>& other)
{
@@ -390,6 +452,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
: m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
{
_check_template_params();
internal::check_rows_cols_for_overflow(other.derived().rows(), other.derived().cols());
Base::operator=(other.derived());
}
@@ -398,74 +461,71 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
* while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
* \a data pointers.
*
* These methods do not allow to specify strides. If you need to specify strides, you have to
* use the Map class directly.
*
* \see class Map
*/
//@{
inline static ConstMapType Map(const Scalar* data)
static inline ConstMapType Map(const Scalar* data)
{ return ConstMapType(data); }
inline static MapType Map(Scalar* data)
static inline MapType Map(Scalar* data)
{ return MapType(data); }
inline static ConstMapType Map(const Scalar* data, Index size)
static inline ConstMapType Map(const Scalar* data, Index size)
{ return ConstMapType(data, size); }
inline static MapType Map(Scalar* data, Index size)
static inline MapType Map(Scalar* data, Index size)
{ return MapType(data, size); }
inline static ConstMapType Map(const Scalar* data, Index rows, Index cols)
static inline ConstMapType Map(const Scalar* data, Index rows, Index cols)
{ return ConstMapType(data, rows, cols); }
inline static MapType Map(Scalar* data, Index rows, Index cols)
static inline MapType Map(Scalar* data, Index rows, Index cols)
{ return MapType(data, rows, cols); }
inline static ConstAlignedMapType MapAligned(const Scalar* data)
static inline ConstAlignedMapType MapAligned(const Scalar* data)
{ return ConstAlignedMapType(data); }
inline static AlignedMapType MapAligned(Scalar* data)
static inline AlignedMapType MapAligned(Scalar* data)
{ return AlignedMapType(data); }
inline static ConstAlignedMapType MapAligned(const Scalar* data, Index size)
static inline ConstAlignedMapType MapAligned(const Scalar* data, Index size)
{ return ConstAlignedMapType(data, size); }
inline static AlignedMapType MapAligned(Scalar* data, Index size)
static inline AlignedMapType MapAligned(Scalar* data, Index size)
{ return AlignedMapType(data, size); }
inline static ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols)
static inline ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols)
{ return ConstAlignedMapType(data, rows, cols); }
inline static AlignedMapType MapAligned(Scalar* data, Index rows, Index cols)
static inline AlignedMapType MapAligned(Scalar* data, Index rows, Index cols)
{ return AlignedMapType(data, rows, cols); }
template<int Outer, int Inner>
inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, const Stride<Outer, Inner>& stride)
static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, const Stride<Outer, Inner>& stride)
{ return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, stride); }
template<int Outer, int Inner>
inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, const Stride<Outer, Inner>& stride)
static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, const Stride<Outer, Inner>& stride)
{ return typename StridedMapType<Stride<Outer, Inner> >::type(data, stride); }
template<int Outer, int Inner>
inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
{ return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, size, stride); }
template<int Outer, int Inner>
inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
{ return typename StridedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
template<int Outer, int Inner>
inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
{ return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
template<int Outer, int Inner>
inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
{ return typename StridedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
template<int Outer, int Inner>
inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, const Stride<Outer, Inner>& stride)
static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, const Stride<Outer, Inner>& stride)
{ return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
template<int Outer, int Inner>
inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, const Stride<Outer, Inner>& stride)
static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, const Stride<Outer, Inner>& stride)
{ return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
template<int Outer, int Inner>
inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
{ return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
template<int Outer, int Inner>
inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
{ return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
template<int Outer, int Inner>
inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
{ return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
template<int Outer, int Inner>
inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
{ return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
//@}
@@ -555,8 +615,12 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
template<typename T0, typename T1>
EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
{
EIGEN_STATIC_ASSERT(bool(NumTraits<T0>::IsInteger) &&
bool(NumTraits<T1>::IsInteger),
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
eigen_assert(rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
internal::check_rows_cols_for_overflow(rows, cols);
m_storage.resize(rows*cols,rows,cols);
EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
}
@@ -583,7 +647,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
public:
#ifndef EIGEN_PARSED_BY_DOXYGEN
EIGEN_STRONG_INLINE static void _check_template_params()
static EIGEN_STRONG_INLINE void _check_template_params()
{
EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor)
&& EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (Options&RowMajor)==0)
@@ -614,14 +678,15 @@ struct internal::conservative_resize_like_impl
if ( ( Derived::IsRowMajor && _this.cols() == cols) || // row-major and we change only the number of rows
(!Derived::IsRowMajor && _this.rows() == rows) ) // column-major and we change only the number of columns
{
internal::check_rows_cols_for_overflow(rows, cols);
_this.derived().m_storage.conservativeResize(rows*cols,rows,cols);
}
else
{
// The storage order does not allow us to use reallocation.
typename Derived::PlainObject tmp(rows,cols);
const Index common_rows = std::min(rows, _this.rows());
const Index common_cols = std::min(cols, _this.cols());
const Index common_rows = (std::min)(rows, _this.rows());
const Index common_cols = (std::min)(cols, _this.cols());
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
_this.derived().swap(tmp);
}
@@ -654,8 +719,8 @@ struct internal::conservative_resize_like_impl
{
// The storage order does not allow us to use reallocation.
typename Derived::PlainObject tmp(other);
const Index common_rows = std::min(tmp.rows(), _this.rows());
const Index common_cols = std::min(tmp.cols(), _this.cols());
const Index common_rows = (std::min)(tmp.rows(), _this.rows());
const Index common_cols = (std::min)(tmp.cols(), _this.cols());
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
_this.derived().swap(tmp);
}

View File

@@ -1,8 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -26,603 +25,89 @@
#ifndef EIGEN_PRODUCT_H
#define EIGEN_PRODUCT_H
/** \class GeneralProduct
template<typename Lhs, typename Rhs> class Product;
template<typename Lhs, typename Rhs, typename StorageKind> class ProductImpl;
/** \class Product
* \ingroup Core_Module
*
* \brief Expression of the product of two general matrices or vectors
* \brief Expression of the product of two arbitrary matrices or vectors
*
* \param LhsNested the type used to store the left-hand side
* \param RhsNested the type used to store the right-hand side
* \param ProductMode the type of the product
* \param Lhs the type of the left-hand side expression
* \param Rhs the type of the right-hand side expression
*
* This class represents an expression of the product of two general matrices.
* We call a general matrix, a dense matrix with full storage. For instance,
* This excludes triangular, selfadjoint, and sparse matrices.
* It is the return type of the operator* between general matrices. Its template
* arguments are determined automatically by ProductReturnType. Therefore,
* GeneralProduct should never be used direclty. To determine the result type of a
* function which involves a matrix product, use ProductReturnType::Type.
* This class represents an expression of the product of two arbitrary matrices.
*
* \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
*/
template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value>
class GeneralProduct;
enum {
Large = 2,
Small = 3
};
namespace internal {
template<int Rows, int Cols, int Depth> struct product_type_selector;
template<int Size, int MaxSize> struct product_size_category
template<typename Lhs, typename Rhs>
struct traits<Product<Lhs, Rhs> >
{
enum { is_large = MaxSize == Dynamic ||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
value = is_large ? Large
: Size == 1 ? 1
: Small
typedef MatrixXpr XprKind;
typedef typename remove_all<Lhs>::type LhsCleaned;
typedef typename remove_all<Rhs>::type RhsCleaned;
typedef typename scalar_product_traits<typename traits<LhsCleaned>::Scalar, typename traits<RhsCleaned>::Scalar>::ReturnType Scalar;
typedef typename promote_storage_type<typename traits<LhsCleaned>::StorageKind,
typename traits<RhsCleaned>::StorageKind>::ret StorageKind;
typedef typename promote_index_type<typename traits<LhsCleaned>::Index,
typename traits<RhsCleaned>::Index>::type Index;
enum {
RowsAtCompileTime = LhsCleaned::RowsAtCompileTime,
ColsAtCompileTime = RhsCleaned::ColsAtCompileTime,
MaxRowsAtCompileTime = LhsCleaned::MaxRowsAtCompileTime,
MaxColsAtCompileTime = RhsCleaned::MaxColsAtCompileTime,
Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0), // TODO should be no storage order
CoeffReadCost = 0 // TODO CoeffReadCost should not be part of the expression traits
};
};
template<typename Lhs, typename Rhs> struct product_type
{
typedef typename remove_all<Lhs>::type _Lhs;
typedef typename remove_all<Rhs>::type _Rhs;
enum {
MaxRows = _Lhs::MaxRowsAtCompileTime,
Rows = _Lhs::RowsAtCompileTime,
MaxCols = _Rhs::MaxColsAtCompileTime,
Cols = _Rhs::ColsAtCompileTime,
MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
_Rhs::MaxRowsAtCompileTime),
Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
_Rhs::RowsAtCompileTime),
LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
};
// the splitting into different lines of code here, introducing the _select enums and the typedef below,
// is to work around an internal compiler error with gcc 4.1 and 4.2.
private:
enum {
rows_select = product_size_category<Rows,MaxRows>::value,
cols_select = product_size_category<Cols,MaxCols>::value,
depth_select = product_size_category<Depth,MaxDepth>::value
};
typedef product_type_selector<rows_select, cols_select, depth_select> selector;
public:
enum {
value = selector::ret
};
#ifdef EIGEN_DEBUG_PRODUCT
static void debug()
{
EIGEN_DEBUG_VAR(Rows);
EIGEN_DEBUG_VAR(Cols);
EIGEN_DEBUG_VAR(Depth);
EIGEN_DEBUG_VAR(rows_select);
EIGEN_DEBUG_VAR(cols_select);
EIGEN_DEBUG_VAR(depth_select);
EIGEN_DEBUG_VAR(value);
}
#endif
};
/* The following allows to select the kind of product at compile time
* based on the three dimensions of the product.
* This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
// FIXME I'm not sure the current mapping is the ideal one.
template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; };
template<> struct product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
} // end namespace internal
/** \class ProductReturnType
* \ingroup Core_Module
*
* \brief Helper class to get the correct and optimized returned type of operator*
*
* \param Lhs the type of the left-hand side
* \param Rhs the type of the right-hand side
* \param ProductMode the type of the product (determined automatically by internal::product_mode)
*
* This class defines the typename Type representing the optimized product expression
* between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
* is the recommended way to define the result type of a function returning an expression
* which involve a matrix product. The class Product should never be
* used directly.
*
* \sa class Product, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
*/
template<typename Lhs, typename Rhs, int ProductType>
struct ProductReturnType
{
// TODO use the nested type to reduce instanciations ????
// typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
// typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type;
};
template<typename Lhs, typename Rhs>
struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
{
typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
};
template<typename Lhs, typename Rhs>
struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
{
typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
};
// this is a workaround for sun CC
template<typename Lhs, typename Rhs>
struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
{};
/***********************************************************************
* Implementation of Inner Vector Vector Product
***********************************************************************/
// FIXME : maybe the "inner product" could return a Scalar
// instead of a 1x1 matrix ??
// Pro: more natural for the user
// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
// product ends up to a row-vector times col-vector product... To tackle this use
// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
namespace internal {
template<typename Lhs, typename Rhs>
struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
: traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
{};
}
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, InnerProduct>
: internal::no_assignment_operator,
public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
{
typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
public:
GeneralProduct(const Lhs& lhs, const Rhs& rhs)
{
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
}
/** Convertion to scalar */
operator const typename Base::Scalar() const {
return Base::coeff(0,0);
}
};
/***********************************************************************
* Implementation of Outer Vector Vector Product
***********************************************************************/
namespace internal {
template<int StorageOrder> struct outer_product_selector;
template<typename Lhs, typename Rhs>
struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
: traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
{};
}
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, OuterProduct>
: public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
typename internal::traits<Rhs>::StorageKind>::ret>
{
public:
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
}
template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
{
internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha);
}
};
namespace internal {
template<> struct outer_product_selector<ColMajor> {
template<typename ProductType, typename Dest>
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
typedef typename Dest::Index Index;
// FIXME make sure lhs is sequentially stored
// FIXME not very good if rhs is real and lhs complex while alpha is real too
const Index cols = dest.cols();
for (Index j=0; j<cols; ++j)
dest.col(j) += (alpha * prod.rhs().coeff(j)) * prod.lhs();
}
};
template<> struct outer_product_selector<RowMajor> {
template<typename ProductType, typename Dest>
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
typedef typename Dest::Index Index;
// FIXME make sure rhs is sequentially stored
// FIXME not very good if lhs is real and rhs complex while alpha is real too
const Index rows = dest.rows();
for (Index i=0; i<rows; ++i)
dest.row(i) += (alpha * prod.lhs().coeff(i)) * prod.rhs();
}
};
} // end namespace internal
/***********************************************************************
* Implementation of General Matrix Vector Product
***********************************************************************/
/* According to the shape/flags of the matrix we have to distinghish 3 different cases:
* 1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine
* 2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine
* 3 - all other cases are handled using a simple loop along the outer-storage direction.
* Therefore we need a lower level meta selector.
* Furthermore, if the matrix is the rhs, then the product has to be transposed.
*/
namespace internal {
template<typename Lhs, typename Rhs>
struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
: traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
{};
template<int Side, int StorageOrder, bool BlasCompatible>
struct gemv_selector;
} // end namespace internal
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, GemvProduct>
: public ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs>
{
public:
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{
// EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value),
// YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
}
enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
}
};
namespace internal {
// The vector is on the left => transposition
template<int StorageOrder, bool BlasCompatible>
struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
Transpose<Dest> destT(dest);
enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct>
(prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha);
}
};
template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
{
EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
};
template<typename Scalar,int Size>
struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
{
EIGEN_STRONG_INLINE Scalar* data() { return 0; }
};
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
{
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
};
template<> struct gemv_selector<OnTheRight,ColMajor,true>
{
template<typename ProductType, typename Dest>
static inline void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename ProductType::Index Index;
typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
typedef typename ProductType::Scalar ResScalar;
typedef typename ProductType::RealScalar RealScalar;
typedef typename ProductType::ActualLhsType ActualLhsType;
typedef typename ProductType::ActualRhsType ActualRhsType;
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
const ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
const ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
};
gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
bool alphaIsCompatible = (!ComplexByReal) || (imag(actualAlpha)==RealScalar(0));
bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
typedef typename ProductImpl<
Lhs, Rhs,
typename internal::promote_storage_type<typename Lhs::StorageKind,
typename Rhs::StorageKind>::ret>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
ResScalar* actualDestPtr;
bool freeDestPtr = false;
if (evalToDest)
typedef typename Lhs::Nested LhsNested;
typedef typename Rhs::Nested RhsNested;
typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
{
actualDestPtr = &dest.coeffRef(0);
}
else
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if((actualDestPtr = static_dest.data())==0)
{
freeDestPtr = true;
actualDestPtr = ei_aligned_stack_new(ResScalar,dest.size());
}
if(!alphaIsCompatible)
{
MappedDest(actualDestPtr, dest.size()).setZero();
compatibleAlpha = RhsScalar(1);
}
else
MappedDest(actualDestPtr, dest.size()) = dest;
eigen_assert(lhs.cols() == rhs.rows()
&& "invalid matrix product"
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
}
general_matrix_vector_product
<Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
&actualLhs.coeffRef(0,0), actualLhs.outerStride(),
actualRhs.data(), actualRhs.innerStride(),
actualDestPtr, 1,
compatibleAlpha);
inline Index rows() const { return m_lhs.rows(); }
inline Index cols() const { return m_rhs.cols(); }
if (!evalToDest)
{
if(!alphaIsCompatible)
dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
else
dest = MappedDest(actualDestPtr, dest.size());
if(freeDestPtr) ei_aligned_stack_delete(ResScalar, actualDestPtr, dest.size());
}
}
const LhsNestedCleaned& lhs() const { return m_lhs; }
const RhsNestedCleaned& rhs() const { return m_rhs; }
protected:
const LhsNested m_lhs;
const RhsNested m_rhs;
};
template<> struct gemv_selector<OnTheRight,RowMajor,true>
template<typename Lhs, typename Rhs>
class ProductImpl<Lhs,Rhs,Dense> : public internal::dense_xpr_base<Product<Lhs,Rhs> >::type
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
typedef typename ProductType::Scalar ResScalar;
typedef typename ProductType::Index Index;
typedef typename ProductType::ActualLhsType ActualLhsType;
typedef typename ProductType::ActualRhsType ActualRhsType;
typedef typename ProductType::_ActualRhsType _ActualRhsType;
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typedef Product<Lhs, Rhs> Derived;
public:
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
};
gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
RhsScalar* actualRhsPtr;
bool freeRhsPtr = false;
if (DirectlyUseRhs)
{
actualRhsPtr = const_cast<RhsScalar*>(&actualRhs.coeffRef(0));
}
else
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = actualRhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if((actualRhsPtr = static_rhs.data())==0)
{
freeRhsPtr = true;
actualRhsPtr = ei_aligned_stack_new(RhsScalar, actualRhs.size());
}
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
}
general_matrix_vector_product
<Index,LhsScalar,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
&actualLhs.coeffRef(0,0), actualLhs.outerStride(),
actualRhsPtr, 1,
&dest.coeffRef(0,0), dest.innerStride(),
actualAlpha);
if((!DirectlyUseRhs) && freeRhsPtr) ei_aligned_stack_delete(RhsScalar, actualRhsPtr, prod.rhs().size());
}
typedef typename internal::dense_xpr_base<Product<Lhs, Rhs> >::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
};
template<> struct gemv_selector<OnTheRight,ColMajor,false>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename Dest::Index Index;
// TODO makes sure dest is sequentially stored in memory, otherwise use a temp
const Index size = prod.rhs().rows();
for(Index k=0; k<size; ++k)
dest += (alpha*prod.rhs().coeff(k)) * prod.lhs().col(k);
}
};
template<> struct gemv_selector<OnTheRight,RowMajor,false>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename Dest::Index Index;
// TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
const Index rows = prod.rows();
for(Index i=0; i<rows; ++i)
dest.coeffRef(i) += alpha * (prod.lhs().row(i).cwiseProduct(prod.rhs().transpose())).sum();
}
};
} // end namespace internal
/***************************************************************************
* Implementation of matrix base methods
***************************************************************************/
/** \returns the matrix product of \c *this and \a other.
*
* \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*().
*
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
*/
template<typename Derived>
template<typename OtherDerived>
inline const typename ProductReturnType<Derived,OtherDerived>::Type
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
{
// A note regarding the function declaration: In MSVC, this function will sometimes
// not be inlined since DenseStorage is an unwindable object for dynamic
// matrices and product types are holding a member to store the result.
// Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
enum {
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|| OtherDerived::RowsAtCompileTime==Dynamic
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
};
// note to the lost user:
// * for a dot product use: v1.dot(v2)
// * for a coeff-wise product use: v1.cwiseProduct(v2)
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
#ifdef EIGEN_DEBUG_PRODUCT
internal::product_type<Derived,OtherDerived>::debug();
#endif
return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
}
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
*
* The returned product will behave like any other expressions: the coefficients of the product will be
* computed once at a time as requested. This might be useful in some extremely rare cases when only
* a small and no coherent fraction of the result's coefficients have to be computed.
*
* \warning This version of the matrix product can be much much slower. So use it only if you know
* what you are doing and that you measured a true speed improvement.
*
* \sa operator*(const MatrixBase&)
*/
template<typename Derived>
template<typename OtherDerived>
const typename LazyProductReturnType<Derived,OtherDerived>::Type
MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
{
enum {
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|| OtherDerived::RowsAtCompileTime==Dynamic
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
};
// note to the lost user:
// * for a dot product use: v1.dot(v2)
// * for a coeff-wise product use: v1.cwiseProduct(v2)
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
}
#endif // EIGEN_PRODUCT_H

View File

@@ -115,10 +115,10 @@ class ProductBase : public MatrixBase<Derived>
inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,Scalar(1)); }
template<typename Dest>
inline void addTo(Dest& dst) const { scaleAndAddTo(dst,1); }
inline void addTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(1)); }
template<typename Dest>
inline void subTo(Dest& dst) const { scaleAndAddTo(dst,-1); }
inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); }
template<typename Dest>
inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { derived().scaleAndAddTo(dst,alpha); }
@@ -179,8 +179,8 @@ class ProductBase : public MatrixBase<Derived>
protected:
const LhsNested m_lhs;
const RhsNested m_rhs;
LhsNested m_lhs;
RhsNested m_rhs;
mutable PlainObject m_result;
};
@@ -256,16 +256,16 @@ class ScaledProduct
: Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {}
template<typename Dest>
inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,m_alpha); }
inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst, Scalar(1)); }
template<typename Dest>
inline void addTo(Dest& dst) const { scaleAndAddTo(dst,m_alpha); }
inline void addTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(1)); }
template<typename Dest>
inline void subTo(Dest& dst) const { scaleAndAddTo(dst,-m_alpha); }
inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); }
template<typename Dest>
inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { m_prod.derived().scaleAndAddTo(dst,alpha); }
inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { m_prod.derived().scaleAndAddTo(dst,alpha * m_alpha); }
const Scalar& alpha() const { return m_alpha; }

View File

@@ -95,7 +95,7 @@ struct redux_novec_unroller
typedef typename Derived::Scalar Scalar;
EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func& func)
static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
{
return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
@@ -112,7 +112,7 @@ struct redux_novec_unroller<Func, Derived, Start, 1>
typedef typename Derived::Scalar Scalar;
EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func&)
static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
{
return mat.coeffByOuterInner(outer, inner);
}
@@ -125,7 +125,7 @@ template<typename Func, typename Derived, int Start>
struct redux_novec_unroller<Func, Derived, Start, 0>
{
typedef typename Derived::Scalar Scalar;
EIGEN_STRONG_INLINE static Scalar run(const Derived&, const Func&) { return Scalar(); }
static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
};
/*** vectorization ***/
@@ -141,7 +141,7 @@ struct redux_vec_unroller
typedef typename Derived::Scalar Scalar;
typedef typename packet_traits<Scalar>::type PacketScalar;
EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func& func)
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
{
return func.packetOp(
redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
@@ -162,7 +162,7 @@ struct redux_vec_unroller<Func, Derived, Start, 1>
typedef typename Derived::Scalar Scalar;
typedef typename packet_traits<Scalar>::type PacketScalar;
EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func&)
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
{
return mat.template packetByOuterInner<alignment>(outer, inner);
}
@@ -214,20 +214,33 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
const Index size = mat.size();
eigen_assert(size && "you are using an empty matrix");
const Index packetSize = packet_traits<Scalar>::size;
const Index alignedStart = first_aligned(mat);
const Index alignedStart = internal::first_aligned(mat);
enum {
alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit)
alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
? Aligned : Unaligned
};
const Index alignedSize = ((size-alignedStart)/packetSize)*packetSize;
const Index alignedEnd = alignedStart + alignedSize;
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
const Index alignedEnd2 = alignedStart + alignedSize2;
const Index alignedEnd = alignedStart + alignedSize;
Scalar res;
if(alignedSize)
{
PacketScalar packet_res = mat.template packet<alignment>(alignedStart);
for(Index index = alignedStart + packetSize; index < alignedEnd; index += packetSize)
packet_res = func.packetOp(packet_res, mat.template packet<alignment>(index));
res = func.predux(packet_res);
PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
{
PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
{
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
}
packet_res0 = func.packetOp(packet_res0,packet_res1);
if(alignedEnd>alignedEnd2)
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
}
res = func.predux(packet_res0);
for(Index index = 0; index < alignedStart; ++index)
res = func(res,mat.coeff(index));
@@ -296,7 +309,7 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
Size = Derived::SizeAtCompileTime,
VectorizedSize = (Size / PacketSize) * PacketSize
};
EIGEN_STRONG_INLINE static Scalar run(const Derived& mat, const Func& func)
static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
{
eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));

View File

@@ -122,9 +122,13 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
return m_matrix.template packet<LoadMode>(actual_row, actual_col);
}
const typename internal::remove_all<typename MatrixType::Nested>::type& nestedExpression() const
{
return m_matrix;
}
protected:
const typename MatrixType::Nested m_matrix;
typename MatrixType::Nested m_matrix;
const internal::variable_if_dynamic<Index, RowFactor> m_rowFactor;
const internal::variable_if_dynamic<Index, ColFactor> m_colFactor;
};

View File

@@ -71,9 +71,9 @@ template<typename Derived> class ReturnByValue
template<typename Dest>
inline void evalTo(Dest& dst) const
{ static_cast<const Derived* const>(this)->evalTo(dst); }
inline Index rows() const { return static_cast<const Derived* const>(this)->rows(); }
inline Index cols() const { return static_cast<const Derived* const>(this)->cols(); }
{ static_cast<const Derived*>(this)->evalTo(dst); }
inline Index rows() const { return static_cast<const Derived*>(this)->rows(); }
inline Index cols() const { return static_cast<const Derived*>(this)->cols(); }
#ifndef EIGEN_PARSED_BY_DOXYGEN
#define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT

View File

@@ -183,8 +183,14 @@ template<typename MatrixType, int Direction> class Reverse
m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, internal::preverse(x));
}
const typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() const
{
return m_matrix;
}
protected:
const typename MatrixType::Nested m_matrix;
typename MatrixType::Nested m_matrix;
};
/** \returns an expression of the reverse of *this.

View File

@@ -101,10 +101,25 @@ class Select : internal::no_assignment_operator,
return m_else.coeff(i);
}
const ConditionMatrixType& conditionMatrix() const
{
return m_condition;
}
const ThenMatrixType& thenMatrix() const
{
return m_then;
}
const ElseMatrixType& elseMatrix() const
{
return m_else;
}
protected:
const typename ConditionMatrixType::Nested m_condition;
const typename ThenMatrixType::Nested m_then;
const typename ElseMatrixType::Nested m_else;
typename ConditionMatrixType::Nested m_condition;
typename ThenMatrixType::Nested m_then;
typename ElseMatrixType::Nested m_else;
};

View File

@@ -32,13 +32,13 @@
* \brief Expression of a selfadjoint matrix from a triangular part of a dense matrix
*
* \param MatrixType the type of the dense matrix storing the coefficients
* \param TriangularPart can be either \c Lower or \c Upper
* \param TriangularPart can be either \c #Lower or \c #Upper
*
* This class is an expression of a sefladjoint matrix from a triangular part of a matrix
* with given dense storage of the coefficients. It is the return type of MatrixBase::selfadjointView()
* and most of the time this is the only way that it is used.
*
* \sa class TriangularBase, MatrixBase::selfAdjointView()
* \sa class TriangularBase, MatrixBase::selfadjointView()
*/
namespace internal {
@@ -82,7 +82,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
};
typedef typename MatrixType::PlainObject PlainObject;
inline SelfAdjointView(const MatrixType& matrix) : m_matrix(matrix)
inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
{}
inline Index rows() const { return m_matrix.rows(); }
@@ -199,7 +199,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
#endif
protected:
const MatrixTypeNested m_matrix;
MatrixTypeNested m_matrix;
};
@@ -222,7 +222,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), U
row = (UnrollCount-1) % Derived1::RowsAtCompileTime
};
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount-1, ClearOpposite>::run(dst, src);
@@ -236,7 +236,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), U
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, 0, ClearOpposite>
{
inline static void run(Derived1 &, const Derived2 &) {}
static inline void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite>
@@ -247,7 +247,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), U
row = (UnrollCount-1) % Derived1::RowsAtCompileTime
};
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount-1, ClearOpposite>::run(dst, src);
@@ -261,14 +261,14 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), U
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, 0, ClearOpposite>
{
inline static void run(Derived1 &, const Derived2 &) {}
static inline void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
@@ -285,7 +285,7 @@ struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dyn
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dynamic, ClearOpposite>
{
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
typedef typename Derived1::Index Index;
for(Index i = 0; i < dst.rows(); ++i)

View File

@@ -163,6 +163,16 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
return Base::operator=(rhs);
}
Lhs& expression() const
{
return m_matrix;
}
const BinaryOp& functor() const
{
return m_functor;
}
protected:
Lhs& m_matrix;
const BinaryOp& m_functor;

View File

@@ -74,26 +74,19 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,1>
// FIXME find a way to allow an inner stride if packet_traits<Scalar>::size==1
bool useRhsDirectly = Rhs::InnerStrideAtCompileTime==1 || rhs.innerStride()==1;
RhsScalar* actualRhs;
if(useRhsDirectly)
{
actualRhs = &rhs.coeffRef(0);
}
else
{
actualRhs = ei_aligned_stack_new(RhsScalar,rhs.size());
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhs,rhs.size(),
(useRhsDirectly ? rhs.data() : 0));
if(!useRhsDirectly)
MappedRhs(actualRhs,rhs.size()) = rhs;
}
triangular_solve_vector<LhsScalar, RhsScalar, typename Lhs::Index, Side, Mode, LhsProductTraits::NeedToConjugate,
(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor>
::run(actualLhs.cols(), actualLhs.data(), actualLhs.outerStride(), actualRhs);
if(!useRhsDirectly)
{
rhs = MappedRhs(actualRhs, rhs.size());
ei_aligned_stack_delete(RhsScalar, actualRhs, rhs.size());
}
}
};
@@ -107,7 +100,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
static void run(const Lhs& lhs, Rhs& rhs)
{
const ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsProductTraits::extract(lhs);
triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor>
::run(lhs.rows(), Side==OnTheLeft? rhs.cols() : rhs.rows(), &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride());
@@ -184,10 +177,8 @@ template<int Side, typename OtherDerived>
void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
{
OtherDerived& other = _other.const_cast_derived();
eigen_assert(cols() == rows());
eigen_assert( (Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols()) );
eigen_assert(!(Mode & ZeroDiag));
eigen_assert(Mode & (Upper|Lower));
eigen_assert( cols() == rows() && ((Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols())) );
eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit && OtherDerived::IsVectorAtCompileTime };
typedef typename internal::conditional<copy,
@@ -262,7 +253,7 @@ template<int Side, typename TriangularType, typename Rhs> struct triangular_solv
protected:
const TriangularType& m_triangularMatrix;
const typename Rhs::Nested m_rhs;
typename Rhs::Nested m_rhs;
};
} // namespace internal

View File

@@ -56,10 +56,11 @@ template<typename Derived>
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
MatrixBase<Derived>::stableNorm() const
{
using std::min;
const Index blockSize = 4096;
RealScalar scale = 0;
RealScalar invScale = 1;
RealScalar ssq = 0; // sum of square
RealScalar scale(0);
RealScalar invScale(1);
RealScalar ssq(0); // sum of square
enum {
Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0
};
@@ -68,7 +69,7 @@ MatrixBase<Derived>::stableNorm() const
if (bi>0)
internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale);
for (; bi<n; bi+=blockSize)
internal::stable_norm_kernel(this->segment(bi,std::min(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
internal::stable_norm_kernel(this->segment(bi,(min)(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
return scale * internal::sqrt(ssq);
}
@@ -85,6 +86,9 @@ template<typename Derived>
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
MatrixBase<Derived>::blueNorm() const
{
using std::pow;
using std::min;
using std::max;
static Index nmax = -1;
static RealScalar b1, b2, s1m, s2m, overfl, rbig, relerr;
if(nmax <= 0)
@@ -99,25 +103,25 @@ MatrixBase<Derived>::blueNorm() const
// For portability, the PORT subprograms "ilmaeh" and "rlmach"
// are used. For any specific computer, each of the assignment
// statements can be replaced
nbig = std::numeric_limits<Index>::max(); // largest integer
nbig = (std::numeric_limits<Index>::max)(); // largest integer
ibeta = std::numeric_limits<RealScalar>::radix; // base for floating-point numbers
it = std::numeric_limits<RealScalar>::digits; // number of base-beta digits in mantissa
iemin = std::numeric_limits<RealScalar>::min_exponent; // minimum exponent
iemax = std::numeric_limits<RealScalar>::max_exponent; // maximum exponent
rbig = std::numeric_limits<RealScalar>::max(); // largest floating-point number
rbig = (std::numeric_limits<RealScalar>::max)(); // largest floating-point number
iexp = -((1-iemin)/2);
b1 = RealScalar(std::pow(RealScalar(ibeta),RealScalar(iexp))); // lower boundary of midrange
b1 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // lower boundary of midrange
iexp = (iemax + 1 - it)/2;
b2 = RealScalar(std::pow(RealScalar(ibeta),RealScalar(iexp))); // upper boundary of midrange
b2 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // upper boundary of midrange
iexp = (2-iemin)/2;
s1m = RealScalar(std::pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for lower range
s1m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for lower range
iexp = - ((iemax+it)/2);
s2m = RealScalar(std::pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range
s2m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range
overfl = rbig*s2m; // overflow boundary for abig
eps = RealScalar(std::pow(double(ibeta), 1-it));
eps = RealScalar(pow(double(ibeta), 1-it));
relerr = internal::sqrt(eps); // tolerance for neglecting asml
abig = RealScalar(1.0/eps - 1.0);
if (RealScalar(nbig)>abig) nmax = int(abig); // largest safe n
@@ -163,8 +167,8 @@ MatrixBase<Derived>::blueNorm() const
}
else
return internal::sqrt(amed);
asml = std::min(abig, amed);
abig = std::max(abig, amed);
asml = (min)(abig, amed);
abig = (max)(abig, amed);
if(asml <= abig*relerr)
return abig;
else

View File

@@ -52,6 +52,15 @@ template<typename ExpressionType> class SwapWrapper
inline Index cols() const { return m_expression.cols(); }
inline Index outerStride() const { return m_expression.outerStride(); }
inline Index innerStride() const { return m_expression.innerStride(); }
typedef typename internal::conditional<
internal::is_lvalue<ExpressionType>::value,
Scalar,
const Scalar
>::type ScalarWithConstIfNotLvalue;
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
inline const Scalar* data() const { return m_expression.data(); }
inline Scalar& coeffRef(Index row, Index col)
{
@@ -119,6 +128,8 @@ template<typename ExpressionType> class SwapWrapper
_other.template writePacket<LoadMode>(index, tmp);
}
ExpressionType& expression() const { return m_expression; }
protected:
ExpressionType& m_expression;
};

View File

@@ -91,7 +91,7 @@ template<typename MatrixType> class Transpose
nestedExpression() { return m_matrix.const_cast_derived(); }
protected:
const typename MatrixType::Nested m_matrix;
typename MatrixType::Nested m_matrix;
};
namespace internal {
@@ -152,12 +152,12 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
return derived().nestedExpression().coeffRef(index);
}
inline const CoeffReturnType coeff(Index row, Index col) const
inline CoeffReturnType coeff(Index row, Index col) const
{
return derived().nestedExpression().coeff(col, row);
}
inline const CoeffReturnType coeff(Index index) const
inline CoeffReturnType coeff(Index index) const
{
return derived().nestedExpression().coeff(index);
}
@@ -350,15 +350,14 @@ struct blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> >
template<bool DestIsTransposed, typename OtherDerived>
struct check_transpose_aliasing_compile_time_selector
{
enum { ret = blas_traits<OtherDerived>::IsTransposed != DestIsTransposed
};
enum { ret = bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed };
};
template<bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
struct check_transpose_aliasing_compile_time_selector<DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
{
enum { ret = blas_traits<DerivedA>::IsTransposed != DestIsTransposed
|| blas_traits<DerivedB>::IsTransposed != DestIsTransposed
enum { ret = bool(blas_traits<DerivedA>::IsTransposed) != DestIsTransposed
|| bool(blas_traits<DerivedB>::IsTransposed) != DestIsTransposed
};
};
@@ -367,7 +366,7 @@ struct check_transpose_aliasing_run_time_selector
{
static bool run(const Scalar* dest, const OtherDerived& src)
{
return (blas_traits<OtherDerived>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(Scalar*)extract_data(src));
return (bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed) && (dest!=0 && dest==(Scalar*)extract_data(src));
}
};

View File

@@ -404,7 +404,7 @@ struct transposition_matrix_product_retval
protected:
const TranspositionType& m_transpositions;
const typename MatrixType::Nested m_matrix;
typename MatrixType::Nested m_matrix;
};
} // end namespace internal

View File

@@ -111,6 +111,7 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
EIGEN_ONLY_USED_FOR_DEBUG(col);
eigen_assert(col>=0 && col<cols() && row>=0 && row<rows());
const int mode = int(Mode) & ~SelfAdjoint;
EIGEN_ONLY_USED_FOR_DEBUG(mode);
eigen_assert((mode==Upper && col>=row)
|| (mode==Lower && col<=row)
|| ((mode==StrictlyUpper || mode==UnitUpper) && col>row)
@@ -134,13 +135,13 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
* \brief Base class for triangular part in a matrix
*
* \param MatrixType the type of the object in which we are taking the triangular part
* \param Mode the kind of triangular matrix expression to construct. Can be Upper,
* Lower, UpperSelfadjoint, or LowerSelfadjoint. This is in fact a bit field;
* it must have either Upper or Lower, and additionnaly it may have either
* UnitDiag or Selfadjoint.
* \param Mode the kind of triangular matrix expression to construct. Can be #Upper,
* #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower.
* This is in fact a bit field; it must have either #Upper or #Lower,
* and additionnaly it may have #UnitDiag or #ZeroDiag or neither.
*
* This class represents a triangular part of a matrix, not necessarily square. Strictly speaking, for rectangular
* matrices one should speak ok "trapezoid" parts. This class is the return type
* matrices one should speak of "trapezoid" parts. This class is the return type
* of MatrixBase::triangularView() and most of the time this is the only way it is used.
*
* \sa MatrixBase::triangularView()
@@ -272,11 +273,8 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
inline const TriangularView<MatrixConjugateReturnType,Mode> conjugate() const
{ return m_matrix.conjugate(); }
/** \sa MatrixBase::adjoint() */
inline TriangularView<typename MatrixType::AdjointReturnType,TransposeMode> adjoint()
{ return m_matrix.adjoint(); }
/** \sa MatrixBase::adjoint() const */
inline const TriangularView<typename MatrixType::AdjointReturnType,TransposeMode> adjoint() const
inline const TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> adjoint() const
{ return m_matrix.adjoint(); }
/** \sa MatrixBase::transpose() */
@@ -287,11 +285,13 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
}
/** \sa MatrixBase::transpose() const */
inline const TriangularView<Transpose<MatrixType>,TransposeMode> transpose() const
{ return m_matrix.transpose(); }
{
return m_matrix.transpose();
}
/** Efficient triangular matrix times vector/matrix product */
template<typename OtherDerived>
TriangularProduct<Mode,true,MatrixType,false,OtherDerived,OtherDerived::IsVectorAtCompileTime>
TriangularProduct<Mode,true,MatrixType,false,OtherDerived, OtherDerived::IsVectorAtCompileTime>
operator*(const MatrixBase<OtherDerived>& rhs) const
{
return TriangularProduct
@@ -374,7 +374,8 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
template<typename OtherDerived>
void swap(MatrixBase<OtherDerived> const & other)
{
TriangularView<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived());
SwapWrapper<MatrixType> swaper(const_cast<MatrixType&>(m_matrix));
TriangularView<SwapWrapper<MatrixType>,Mode>(swaper).lazyAssign(other.derived());
}
Scalar determinant() const
@@ -432,7 +433,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_STRONG_INLINE TriangularView& assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha);
const MatrixTypeNested m_matrix;
MatrixTypeNested m_matrix;
};
/***************************************************************************
@@ -448,8 +449,10 @@ struct triangular_assignment_selector
col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
row = (UnrollCount-1) % Derived1::RowsAtCompileTime
};
typedef typename Derived1::Scalar Scalar;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
triangular_assignment_selector<Derived1, Derived2, Mode, UnrollCount-1, ClearOpposite>::run(dst, src);
@@ -466,9 +469,9 @@ struct triangular_assignment_selector
else if(ClearOpposite)
{
if (Mode&UnitDiag && row==col)
dst.coeffRef(row, col) = 1;
dst.coeffRef(row, col) = Scalar(1);
else
dst.coeffRef(row, col) = 0;
dst.coeffRef(row, col) = Scalar(0);
}
}
};
@@ -477,23 +480,24 @@ struct triangular_assignment_selector
template<typename Derived1, typename Derived2, unsigned int Mode, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, Mode, 0, ClearOpposite>
{
inline static void run(Derived1 &, const Derived2 &) {}
static inline void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, Upper, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
typedef typename Derived1::Scalar Scalar;
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
Index maxi = std::min(j, dst.rows()-1);
Index maxi = (std::min)(j, dst.rows()-1);
for(Index i = 0; i <= maxi; ++i)
dst.copyCoeff(i, j, src);
if (ClearOpposite)
for(Index i = maxi+1; i < dst.rows(); ++i)
dst.coeffRef(i, j) = 0;
dst.coeffRef(i, j) = Scalar(0);
}
}
};
@@ -502,16 +506,16 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
for(Index i = j; i < dst.rows(); ++i)
dst.copyCoeff(i, j, src);
Index maxi = std::min(j, dst.rows());
Index maxi = (std::min)(j, dst.rows());
if (ClearOpposite)
for(Index i = 0; i < maxi; ++i)
dst.coeffRef(i, j) = 0;
dst.coeffRef(i, j) = static_cast<typename Derived1::Scalar>(0);
}
}
};
@@ -520,11 +524,11 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
Index maxi = std::min(j, dst.rows());
Index maxi = (std::min)(j, dst.rows());
for(Index i = 0; i < maxi; ++i)
dst.copyCoeff(i, j, src);
if (ClearOpposite)
@@ -538,16 +542,16 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
for(Index i = j+1; i < dst.rows(); ++i)
dst.copyCoeff(i, j, src);
Index maxi = std::min(j, dst.rows()-1);
Index maxi = (std::min)(j, dst.rows()-1);
if (ClearOpposite)
for(Index i = 0; i <= maxi; ++i)
dst.coeffRef(i, j) = 0;
dst.coeffRef(i, j) = static_cast<typename Derived1::Scalar>(0);
}
}
};
@@ -556,11 +560,11 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
Index maxi = std::min(j, dst.rows());
Index maxi = (std::min)(j, dst.rows());
for(Index i = 0; i < maxi; ++i)
dst.copyCoeff(i, j, src);
if (ClearOpposite)
@@ -576,11 +580,11 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
Index maxi = std::min(j, dst.rows());
Index maxi = (std::min)(j, dst.rows());
for(Index i = maxi+1; i < dst.rows(); ++i)
dst.copyCoeff(i, j, src);
if (ClearOpposite)
@@ -756,8 +760,8 @@ typename internal::eigen2_part_return_type<Derived, Mode>::type MatrixBase<Deriv
/**
* \returns an expression of a triangular view extracted from the current matrix
*
* The parameter \a Mode can have the following values: \c Upper, \c StrictlyUpper, \c UnitUpper,
* \c Lower, \c StrictlyLower, \c UnitLower.
* The parameter \a Mode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper,
* \c #Lower, \c #StrictlyLower, \c #UnitLower.
*
* Example: \include MatrixBase_extract.cpp
* Output: \verbinclude MatrixBase_extract.out
@@ -792,7 +796,7 @@ bool MatrixBase<Derived>::isUpperTriangular(RealScalar prec) const
RealScalar maxAbsOnUpperPart = static_cast<RealScalar>(-1);
for(Index j = 0; j < cols(); ++j)
{
Index maxi = std::min(j, rows()-1);
Index maxi = (std::min)(j, rows()-1);
for(Index i = 0; i <= maxi; ++i)
{
RealScalar absValue = internal::abs(coeff(i,j));
@@ -824,7 +828,7 @@ bool MatrixBase<Derived>::isLowerTriangular(RealScalar prec) const
RealScalar threshold = maxAbsOnLowerPart * prec;
for(Index j = 1; j < cols(); ++j)
{
Index maxi = std::min(j, rows()-1);
Index maxi = (std::min)(j, rows()-1);
for(Index i = 0; i < maxi; ++i)
if(internal::abs(coeff(i, j)) > threshold) return false;
}

View File

@@ -31,9 +31,9 @@
*
* \brief Generic expression of a partially reduxed matrix
*
* \param MatrixType the type of the matrix we are applying the redux operation
* \param MemberOp type of the member functor
* \param Direction indicates the direction of the redux (Vertical or Horizontal)
* \tparam MatrixType the type of the matrix we are applying the redux operation
* \tparam MemberOp type of the member functor
* \tparam Direction indicates the direction of the redux (#Vertical or #Horizontal)
*
* This class represents an expression of a partial redux operator of a matrix.
* It is the return type of some VectorwiseOp functions,
@@ -110,7 +110,7 @@ class PartialReduxExpr : internal::no_assignment_operator,
}
protected:
const MatrixTypeNested m_matrix;
MatrixTypeNested m_matrix;
const MemberOp m_functor;
};
@@ -164,7 +164,7 @@ struct member_redux {
* \brief Pseudo expression providing partial reduction operations
*
* \param ExpressionType the type of the object on which to do partial reductions
* \param Direction indicates the direction of the redux (Vertical or Horizontal)
* \param Direction indicates the direction of the redux (#Vertical or #Horizontal)
*
* This class represents a pseudo expression with partial reduction features.
* It is the return type of DenseBase::colwise() and DenseBase::rowwise()
@@ -237,7 +237,10 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
typename ExtendedType<OtherDerived>::Type
extendedTo(const DenseBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Vertical, OtherDerived::MaxColsAtCompileTime==1),
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Horizontal, OtherDerived::MaxRowsAtCompileTime==1),
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
return typename ExtendedType<OtherDerived>::Type
(other.derived(),
Direction==Vertical ? 1 : m_matrix.rows(),
@@ -418,10 +421,9 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
ExpressionType& operator=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
//eigen_assert((m_matrix.isNull()) == (other.isNull())); FIXME
for(Index j=0; j<subVectors(); ++j)
subVector(j) = other;
return const_cast<ExpressionType&>(m_matrix);
return const_cast<ExpressionType&>(m_matrix = extendedTo(other.derived()));
}
/** Adds the vector \a other to each subvector of \c *this */
@@ -429,9 +431,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
ExpressionType& operator+=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
for(Index j=0; j<subVectors(); ++j)
subVector(j) += other.derived();
return const_cast<ExpressionType&>(m_matrix);
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
return const_cast<ExpressionType&>(m_matrix += extendedTo(other.derived()));
}
/** Substracts the vector \a other to each subvector of \c *this */
@@ -439,8 +440,29 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
ExpressionType& operator-=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
for(Index j=0; j<subVectors(); ++j)
subVector(j) -= other.derived();
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
return const_cast<ExpressionType&>(m_matrix -= extendedTo(other.derived()));
}
/** Multiples each subvector of \c *this by the vector \a other */
template<typename OtherDerived>
ExpressionType& operator*=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
m_matrix *= extendedTo(other.derived());
return const_cast<ExpressionType&>(m_matrix);
}
/** Divides each subvector of \c *this by the vector \a other */
template<typename OtherDerived>
ExpressionType& operator/=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
m_matrix /= extendedTo(other.derived());
return const_cast<ExpressionType&>(m_matrix);
}
@@ -451,7 +473,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
const typename ExtendedType<OtherDerived>::Type>
operator+(const DenseBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
return m_matrix + extendedTo(other.derived());
}
@@ -462,10 +485,39 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
const typename ExtendedType<OtherDerived>::Type>
operator-(const DenseBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
return m_matrix - extendedTo(other.derived());
}
/** Returns the expression where each subvector is the product of the vector \a other
* by the corresponding subvector of \c *this */
template<typename OtherDerived> EIGEN_STRONG_INLINE
CwiseBinaryOp<internal::scalar_product_op<Scalar>,
const ExpressionTypeNestedCleaned,
const typename ExtendedType<OtherDerived>::Type>
operator*(const DenseBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
return m_matrix * extendedTo(other.derived());
}
/** Returns the expression where each subvector is the quotient of the corresponding
* subvector of \c *this by the vector \a other */
template<typename OtherDerived>
CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
const ExpressionTypeNestedCleaned,
const typename ExtendedType<OtherDerived>::Type>
operator/(const DenseBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
return m_matrix / extendedTo(other.derived());
}
/////////// Geometry module ///////////
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
@@ -509,7 +561,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Example: \include MatrixBase_colwise.cpp
* Output: \verbinclude MatrixBase_colwise.out
*
* \sa rowwise(), class VectorwiseOp
* \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
*/
template<typename Derived>
inline const typename DenseBase<Derived>::ConstColwiseReturnType
@@ -520,7 +572,7 @@ DenseBase<Derived>::colwise() const
/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
*
* \sa rowwise(), class VectorwiseOp
* \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
*/
template<typename Derived>
inline typename DenseBase<Derived>::ColwiseReturnType
@@ -534,7 +586,7 @@ DenseBase<Derived>::colwise()
* Example: \include MatrixBase_rowwise.cpp
* Output: \verbinclude MatrixBase_rowwise.out
*
* \sa colwise(), class VectorwiseOp
* \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
*/
template<typename Derived>
inline const typename DenseBase<Derived>::ConstRowwiseReturnType
@@ -545,7 +597,7 @@ DenseBase<Derived>::rowwise() const
/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
*
* \sa colwise(), class VectorwiseOp
* \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
*/
template<typename Derived>
inline typename DenseBase<Derived>::RowwiseReturnType

View File

@@ -35,7 +35,7 @@ struct visitor_impl
row = (UnrollCount-1) % Derived::RowsAtCompileTime
};
inline static void run(const Derived &mat, Visitor& visitor)
static inline void run(const Derived &mat, Visitor& visitor)
{
visitor_impl<Visitor, Derived, UnrollCount-1>::run(mat, visitor);
visitor(mat.coeff(row, col), row, col);
@@ -45,7 +45,7 @@ struct visitor_impl
template<typename Visitor, typename Derived>
struct visitor_impl<Visitor, Derived, 1>
{
inline static void run(const Derived &mat, Visitor& visitor)
static inline void run(const Derived &mat, Visitor& visitor)
{
return visitor.init(mat.coeff(0, 0), 0, 0);
}
@@ -55,7 +55,7 @@ template<typename Visitor, typename Derived>
struct visitor_impl<Visitor, Derived, Dynamic>
{
typedef typename Derived::Index Index;
inline static void run(const Derived& mat, Visitor& visitor)
static inline void run(const Derived& mat, Visitor& visitor)
{
visitor.init(mat.coeff(0,0), 0, 0);
for(Index i = 1; i < mat.rows(); ++i)

View File

@@ -48,6 +48,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
typedef Packet2cf type;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
HasAdd = 1,
@@ -69,19 +70,17 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<flo
{
Packet2cf res;
/* On AltiVec we cannot load 64-bit registers, so wa have to take care of alignment */
if ((ptrdiff_t)&from % 16 == 0) {
res.v = pload((const float *)&from);
res.v = vec_perm(res.v, res.v, p16uc_PSET_HI);
} else {
res.v = ploadu((const float *)&from);
res.v = vec_perm(res.v, res.v, p16uc_PSET_LO);
}
if((ptrdiff_t(&from) % 16) == 0)
res.v = pload<Packet4f>((const float *)&from);
else
res.v = ploadu<Packet4f>((const float *)&from);
res.v = vec_perm(res.v, res.v, p16uc_PSET_HI);
return res;
}
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_add(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_sub(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(psub<Packet4f>(p4f_ZERO, a.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf((Packet4f)vec_xor((Packet4ui)a.v, p4ui_CONJ_XOR)); }
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
@@ -108,8 +107,13 @@ template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a,
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_xor(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v, vec_nor(b.v,b.v))); }
template<> EIGEN_STRONG_INLINE Packet2cf pload <std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from)
{
return pset1<Packet2cf>(*from);
}
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
@@ -136,7 +140,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packe
Packet4f b;
b = (Packet4f) vec_sld(a.v, a.v, 8);
b = padd(a.v, b);
return pfirst(Packet2cf(sum));
return pfirst(Packet2cf(b));
}
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
@@ -164,7 +168,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
template<int Offset>
struct palign_impl<Offset,Packet2cf>
{
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
{
if (Offset==1)
{
@@ -180,7 +184,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return pmul(a, pconj(b));
return internal::pmul(a, pconj(b));
}
};
@@ -191,7 +195,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return pmul(pconj(a), b);
return internal::pmul(pconj(a), b);
}
};
@@ -202,7 +206,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return pconj(pmul(a, b));
return pconj(internal::pmul(a, b));
}
};
@@ -214,6 +218,11 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con
return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX_REV))));
}
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
{
return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX_REV));
}
} // end namespace internal
#endif // EIGEN_COMPLEX_ALTIVEC_H

View File

@@ -73,6 +73,7 @@ static Packet4f p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 };
static Packet4i p4i_COUNTDOWN = { 3, 2, 1, 0 };
static Packet16uc p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0);
static Packet16uc p16uc_DUPLICATE = {0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7};
static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0);
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0);
@@ -292,6 +293,21 @@ template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
return (Packet4i) vec_perm(MSQ, LSQ, mask); // align the data
}
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
{
Packet4f p;
if((ptrdiff_t(&from) % 16) == 0) p = pload<Packet4f>(from);
else p = ploadu<Packet4f>(from);
return vec_perm(p, p, p16uc_DUPLICATE);
}
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
{
Packet4i p;
if((ptrdiff_t(&from) % 16) == 0) p = pload<Packet4i>(from);
else p = ploadu<Packet4i>(from);
return vec_perm(p, p, p16uc_DUPLICATE);
}
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
@@ -471,7 +487,7 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
template<int Offset>
struct palign_impl<Offset,Packet4f>
{
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
{
if (Offset!=0)
first = vec_sld(first, second, Offset*4);
@@ -481,7 +497,7 @@ struct palign_impl<Offset,Packet4f>
template<int Offset>
struct palign_impl<Offset,Packet4i>
{
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
{
if (Offset!=0)
first = vec_sld(first, second, Offset*4);

View File

@@ -27,8 +27,8 @@
namespace internal {
static uint32x4_t p4ui_CONJ_XOR = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
static uint32x2_t p2ui_CONJ_XOR = { 0x00000000, 0x80000000 };
static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000);
static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000);
//---------- float ----------
struct Packet2cf
@@ -43,6 +43,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
typedef Packet2cf type;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
HasAdd = 1,
@@ -120,6 +121,8 @@ template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a,
template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
@@ -144,7 +147,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
return Packet2cf(a_r128);
}
EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& a)
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
{
return Packet2cf(vrev64q_f32(a.v));
}
@@ -220,7 +223,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return pmul(a, pconj(b));
return internal::pmul(a, pconj(b));
}
};
@@ -231,7 +234,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return pmul(pconj(a), b);
return internal::pmul(pconj(a), b);
}
};
@@ -242,7 +245,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return pconj(pmul(a, b));
return pconj(internal::pmul(a, b));
}
};

View File

@@ -41,7 +41,7 @@ namespace internal {
typedef float32x4_t Packet4f;
typedef int32x4_t Packet4i;
typedef uint32x4_t Packet4ui;
typedef uint32x4_t Packet4ui;
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
const Packet4f p4f_##NAME = pset1<Packet4f>(X)
@@ -52,6 +52,16 @@ typedef uint32x4_t Packet4ui;
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
const Packet4i p4i_##NAME = pset1<Packet4i>(X)
#if defined(__llvm__) && !defined(__clang__)
//Special treatment for Apple's llvm-gcc, its NEON packet types are unions
#define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}}
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}}
#else
//Default initializer for packets
#define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y}
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W}
#endif
#ifndef __pld
#define __pld(x) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (x) : "cc" );
#endif
@@ -84,8 +94,8 @@ template<> struct packet_traits<int> : default_packet_traits
};
};
#if (defined __GNUC__) && (!(EIGEN_GNUC_AT_LEAST(4,4)))
// workaround gcc 4.2 and 4.3 compilatin issue
#if EIGEN_GNUC_AT_MOST(4,4) && !defined(__llvm__)
// workaround gcc 4.2, 4.3 and 4.4 compilatin issue
EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q_f32((float32_t*)to,from); }
@@ -100,12 +110,12 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a)
{
Packet4f countdown = { 3, 2, 1, 0 };
Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
return vaddq_f32(pset1<Packet4f>(a), countdown);
}
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a)
{
Packet4i countdown = { 3, 2, 1, 0 };
Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
return vaddq_s32(pset1<Packet4i>(a), countdown);
}
@@ -191,14 +201,14 @@ template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
{
float32x2_t lo, hi;
lo = vdup_n_f32(*from);
hi = vdup_n_f32(*from);
hi = vdup_n_f32(*(from+1));
return vcombine_f32(lo, hi);
}
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
{
int32x2_t lo, hi;
lo = vdup_n_s32(*from);
hi = vdup_n_s32(*from);
hi = vdup_n_s32(*(from+1));
return vcombine_s32(lo, hi);
}
@@ -395,25 +405,29 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
return s[0];
}
template<int Offset>
struct palign_impl<Offset,Packet4f>
{
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
{
if (Offset!=0)
first = vextq_f32(first, second, Offset);
}
};
// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
#define PALIGN_NEON(Offset,Type,Command) \
template<>\
struct palign_impl<Offset,Type>\
{\
EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
{\
if (Offset!=0)\
first = Command(first, second, Offset);\
}\
};\
template<int Offset>
struct palign_impl<Offset,Packet4i>
{
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
{
if (Offset!=0)
first = vextq_s32(first, second, Offset);
}
};
PALIGN_NEON(0,Packet4f,vextq_f32)
PALIGN_NEON(1,Packet4f,vextq_f32)
PALIGN_NEON(2,Packet4f,vextq_f32)
PALIGN_NEON(3,Packet4f,vextq_f32)
PALIGN_NEON(0,Packet4i,vextq_s32)
PALIGN_NEON(1,Packet4i,vextq_s32)
PALIGN_NEON(2,Packet4i,vextq_s32)
PALIGN_NEON(3,Packet4i,vextq_s32)
#undef PALIGN_NEON
} // end namespace internal

View File

@@ -97,23 +97,30 @@ template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a,
template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(&real_ref(*from))); }
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(&real_ref(*from))); }
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
Packet2cf res;
#if EIGEN_GNUC_AT_MOST(4,2)
// workaround annoying "may be used uninitialized in this function" warning with gcc 4.2
res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from));
#else
res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
#endif
return Packet2cf(_mm_movelh_ps(res.v,res.v));
}
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&real_ref(*to), from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&real_ref(*to), from.v); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
Packet2cf res;
res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
return Packet2cf(_mm_movelh_ps(res.v,res.v));
}
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
{
#if (defined __GNUC__) && (__GNUC__==4) && (__GNUC_MINOR__==2) && (__GNUC_PATCHLEVEL__<=3)
// workaround gcc 4.2.1 ICE (mac's gcc version) - I'm not sure how the 4.2.2 and 4.2.3 deal with it, but 4.2.4 works well.
// this is not performance wise ideal, but who cares...
#if EIGEN_GNUC_AT_MOST(4,3)
// Workaround gcc 4.2 ICE - this is not performance wise ideal, but who cares...
// This workaround also fix invalid code generation with gcc 4.3
EIGEN_ALIGN16 std::complex<float> res[2];
_mm_store_ps((float*)res, a.v);
return res[0];
@@ -144,7 +151,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
template<int Offset>
struct palign_impl<Offset,Packet2cf>
{
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
{
if (Offset==1)
{
@@ -308,6 +315,8 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<do
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
// FIXME force unaligned store, this is a temporary fix
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
@@ -341,7 +350,7 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const
template<int Offset>
struct palign_impl<Offset,Packet1cd>
{
EIGEN_STRONG_INLINE static void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
{
// FIXME is it sure we never have to align a Packet1cd?
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...

View File

@@ -110,23 +110,18 @@ template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
#ifdef __GNUC__
// Sometimes GCC implements _mm_set1_p* using multiple moves,
// that is inefficient :( (e.g., see gemm_pack_rhs)
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
Packet4f res = _mm_set_ss(from);
return vec4f_swizzle1(res,0,0,0,0);
}
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
// NOTE the SSE3 intrinsic _mm_loaddup_pd is never faster but sometimes much slower
Packet2d res = _mm_set_sd(from);
return vec2d_swizzle1(res, 0, 0);
}
#if defined(_MSC_VER) && (_MSC_VER==1500)
// Workaround MSVC 9 internal compiler error.
// TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode
// TODO: let's check whether there does not exist a better fix, like adding a pset0() function. (it crashed on pset1(0)).
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps(from,from,from,from); }
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
#else
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set1_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
#endif
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
#endif
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
@@ -245,35 +240,58 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { E
// a correct instruction dependency.
// TODO: do the same for MSVC (ICC is compatible)
// NOTE: with the code below, MSVC's compiler crashes!
#if defined(__GNUC__) && defined(__i386__)
// bug 195: gcc/i386 emits weird x87 fldl/fstpl instructions for _mm_load_sd
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
#elif defined(__clang__)
// bug 201: Segfaults in __mm_loadh_pd with clang 2.8
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
#else
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 0
#endif
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
{
EIGEN_DEBUG_UNALIGNED_LOAD
#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
return _mm_loadu_ps(from);
#else
__m128d res;
res = _mm_load_sd((const double*)(from)) ;
res = _mm_loadh_pd(res, (const double*)(from+2)) ;
return _mm_castpd_ps(res);
#endif
}
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
{
EIGEN_DEBUG_UNALIGNED_LOAD
#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
return _mm_loadu_pd(from);
#else
__m128d res;
res = _mm_load_sd(from) ;
res = _mm_loadh_pd(res,from+1);
return res;
#endif
}
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
{
EIGEN_DEBUG_UNALIGNED_LOAD
#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from));
#else
__m128d res;
res = _mm_load_sd((const double*)(from)) ;
res = _mm_loadh_pd(res, (const double*)(from+2)) ;
return _mm_castpd_si128(res);
#endif
}
#endif
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
{
return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd((const double*)from)), 0, 0, 1, 1);
return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(from))), 0, 0, 1, 1);
}
template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
{ return pset1<Packet2d>(from[0]); }
@@ -293,8 +311,21 @@ template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d&
_mm_storel_pd((to), from);
_mm_storeh_pd((to+1), from);
}
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, _mm_castps_pd(from)); }
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, _mm_castsi128_pd(from)); }
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), _mm_castps_pd(from)); }
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), _mm_castsi128_pd(from)); }
// some compilers might be tempted to perform multiple moves instead of using a vector path.
template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
{
Packet4f pa = _mm_set_ss(a);
pstore(to, vec4f_swizzle1(pa,0,0,0,0));
}
// some compilers might be tempted to perform multiple moves instead of using a vector path.
template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double& a)
{
Packet2d pa = _mm_set_sd(a);
pstore(to, vec2d_swizzle1(pa,0,0));
}
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
@@ -519,7 +550,7 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
template<int Offset>
struct palign_impl<Offset,Packet4f>
{
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
{
if (Offset!=0)
first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
@@ -529,7 +560,7 @@ struct palign_impl<Offset,Packet4f>
template<int Offset>
struct palign_impl<Offset,Packet4i>
{
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
{
if (Offset!=0)
first = _mm_alignr_epi8(second,first, Offset*4);
@@ -539,7 +570,7 @@ struct palign_impl<Offset,Packet4i>
template<int Offset>
struct palign_impl<Offset,Packet2d>
{
EIGEN_STRONG_INLINE static void run(Packet2d& first, const Packet2d& second)
static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
{
if (Offset==1)
first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
@@ -550,7 +581,7 @@ struct palign_impl<Offset,Packet2d>
template<int Offset>
struct palign_impl<Offset,Packet4f>
{
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
{
if (Offset==1)
{
@@ -573,7 +604,7 @@ struct palign_impl<Offset,Packet4f>
template<int Offset>
struct palign_impl<Offset,Packet4i>
{
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
{
if (Offset==1)
{
@@ -596,7 +627,7 @@ struct palign_impl<Offset,Packet4i>
template<int Offset>
struct palign_impl<Offset,Packet2d>
{
EIGEN_STRONG_INLINE static void run(Packet2d& first, const Packet2d& second)
static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
{
if (Offset==1)
{

View File

@@ -224,8 +224,8 @@ class CoeffBasedProduct
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
protected:
const LhsNested m_lhs;
const RhsNested m_rhs;
typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
mutable PlainObject m_result;
};
@@ -252,7 +252,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
@@ -263,7 +263,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
}
@@ -273,7 +273,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
{
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
@@ -291,7 +291,7 @@ struct product_coeff_vectorized_unroller
{
typedef typename Lhs::Index Index;
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
{
product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
@@ -302,7 +302,7 @@ template<typename Lhs, typename Rhs, typename Packet>
struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
{
pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
}
@@ -314,7 +314,7 @@ struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, Re
typedef typename Lhs::PacketScalar Packet;
typedef typename Lhs::Index Index;
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
Packet pres;
product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
@@ -327,7 +327,7 @@ template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int R
struct product_coeff_vectorized_dyn_selector
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
}
@@ -339,7 +339,7 @@ template<typename Lhs, typename Rhs, int RhsCols>
struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
}
@@ -349,7 +349,7 @@ template<typename Lhs, typename Rhs, int LhsRows>
struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
}
@@ -359,7 +359,7 @@ template<typename Lhs, typename Rhs>
struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
res = lhs.transpose().cwiseProduct(rhs).sum();
}
@@ -369,7 +369,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
}
@@ -383,7 +383,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo
struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
@@ -394,7 +394,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo
struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
@@ -405,7 +405,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
}
@@ -415,7 +415,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
}
@@ -425,7 +425,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
{
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
@@ -438,7 +438,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
{
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));

View File

@@ -30,19 +30,18 @@ namespace internal {
template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false>
class gebp_traits;
/** \internal \returns b if a<=0, and returns a otherwise. */
inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff_t b)
{
return a<=0 ? b : a;
}
/** \internal */
inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0)
{
static std::ptrdiff_t m_l1CacheSize = 0;
static std::ptrdiff_t m_l2CacheSize = 0;
if(m_l1CacheSize==0)
{
m_l1CacheSize = queryL1CacheSize();
m_l2CacheSize = queryTopLevelCacheSize();
if(m_l1CacheSize<=0) m_l1CacheSize = 8 * 1024;
if(m_l2CacheSize<=0) m_l2CacheSize = 1 * 1024 * 1024;
}
static std::ptrdiff_t m_l1CacheSize = manage_caching_sizes_helper(queryL1CacheSize(),8 * 1024);
static std::ptrdiff_t m_l2CacheSize = manage_caching_sizes_helper(queryTopLevelCacheSize(),1*1024*1024);
if(action==SetAction)
{
@@ -81,6 +80,7 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdi
template<typename LhsScalar, typename RhsScalar, int KcFactor>
void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
{
EIGEN_UNUSED_VARIABLE(n);
// Explanations:
// Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and
// mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed
@@ -102,7 +102,6 @@ void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrd
k = std::min<std::ptrdiff_t>(k, l1/kdiv);
std::ptrdiff_t _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
if(_m<m) m = _m & mr_mask;
n = n;
}
template<typename LhsScalar, typename RhsScalar>
@@ -118,14 +117,14 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st
// FIXME (a bit overkill maybe ?)
template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
{
c = cj.pmadd(a,b,c);
}
};
template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, T& a, T& b, T& c, T& t)
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t)
{
t = b; t = cj.pmul(a,t); c = padd(c,t);
}
@@ -199,7 +198,7 @@ public:
EIGEN_STRONG_INLINE void unpackRhs(DenseIndex n, const RhsScalar* rhs, RhsScalar* b)
{
for(DenseIndex k=0; k<n; k++)
pstore(&b[k*RhsPacketSize], pset1<RhsPacket>(rhs[k]));
pstore1<RhsPacket>(&b[k*RhsPacketSize], rhs[k]);
}
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
@@ -270,7 +269,7 @@ public:
EIGEN_STRONG_INLINE void unpackRhs(DenseIndex n, const RhsScalar* rhs, RhsScalar* b)
{
for(DenseIndex k=0; k<n; k++)
pstore(&b[k*RhsPacketSize], pset1<RhsPacket>(rhs[k]));
pstore1<RhsPacket>(&b[k*RhsPacketSize], rhs[k]);
}
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
@@ -363,8 +362,8 @@ public:
{
if(Vectorizable)
{
pstore((RealScalar*)&b[k*ResPacketSize*2+0], pset1<RealPacket>(real(rhs[k])));
pstore((RealScalar*)&b[k*ResPacketSize*2+ResPacketSize], pset1<RealPacket>(imag(rhs[k])));
pstore1<RealPacket>((RealScalar*)&b[k*ResPacketSize*2+0], real(rhs[k]));
pstore1<RealPacket>((RealScalar*)&b[k*ResPacketSize*2+ResPacketSize], imag(rhs[k]));
}
else
b[k] = rhs[k];
@@ -475,7 +474,7 @@ public:
EIGEN_STRONG_INLINE void unpackRhs(DenseIndex n, const RhsScalar* rhs, RhsScalar* b)
{
for(DenseIndex k=0; k<n; k++)
pstore(&b[k*RhsPacketSize], pset1<RhsPacket>(rhs[k]));
pstore1<RhsPacket>(&b[k*RhsPacketSize], rhs[k]);
}
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
@@ -536,7 +535,7 @@ struct gebp_kernel
ResPacketSize = Traits::ResPacketSize
};
EIGEN_FLATTEN_ATTRIB
EIGEN_DONT_INLINE EIGEN_FLATTEN_ATTRIB
void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB = 0)
{
@@ -598,64 +597,64 @@ struct gebp_kernel
if(nr==2)
{
LhsPacket A0, A1;
RhsPacket B0;
RhsPacket B_0;
RhsPacket T0;
EIGEN_ASM_COMMENT("mybegin2");
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadLhs(&blA[1*LhsProgress], A1);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[1*RhsProgress], B0);
traits.madd(A0,B0,C1,T0);
traits.madd(A1,B0,C5,B0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[1*RhsProgress], B_0);
traits.madd(A0,B_0,C1,T0);
traits.madd(A1,B_0,C5,B_0);
traits.loadLhs(&blA[2*LhsProgress], A0);
traits.loadLhs(&blA[3*LhsProgress], A1);
traits.loadRhs(&blB[2*RhsProgress], B0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[3*RhsProgress], B0);
traits.madd(A0,B0,C1,T0);
traits.madd(A1,B0,C5,B0);
traits.loadRhs(&blB[2*RhsProgress], B_0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[3*RhsProgress], B_0);
traits.madd(A0,B_0,C1,T0);
traits.madd(A1,B_0,C5,B_0);
traits.loadLhs(&blA[4*LhsProgress], A0);
traits.loadLhs(&blA[5*LhsProgress], A1);
traits.loadRhs(&blB[4*RhsProgress], B0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[5*RhsProgress], B0);
traits.madd(A0,B0,C1,T0);
traits.madd(A1,B0,C5,B0);
traits.loadRhs(&blB[4*RhsProgress], B_0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[5*RhsProgress], B_0);
traits.madd(A0,B_0,C1,T0);
traits.madd(A1,B_0,C5,B_0);
traits.loadLhs(&blA[6*LhsProgress], A0);
traits.loadLhs(&blA[7*LhsProgress], A1);
traits.loadRhs(&blB[6*RhsProgress], B0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[7*RhsProgress], B0);
traits.madd(A0,B0,C1,T0);
traits.madd(A1,B0,C5,B0);
traits.loadRhs(&blB[6*RhsProgress], B_0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[7*RhsProgress], B_0);
traits.madd(A0,B_0,C1,T0);
traits.madd(A1,B_0,C5,B_0);
EIGEN_ASM_COMMENT("myend");
}
else
{
EIGEN_ASM_COMMENT("mybegin4");
LhsPacket A0, A1;
RhsPacket B0, B1, B2, B3;
RhsPacket B_0, B1, B2, B3;
RhsPacket T0;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadLhs(&blA[1*LhsProgress], A1);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.loadRhs(&blB[1*RhsProgress], B1);
traits.madd(A0,B0,C0,T0);
traits.madd(A0,B_0,C0,T0);
traits.loadRhs(&blB[2*RhsProgress], B2);
traits.madd(A1,B0,C4,B0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[3*RhsProgress], B3);
traits.loadRhs(&blB[4*RhsProgress], B0);
traits.loadRhs(&blB[4*RhsProgress], B_0);
traits.madd(A0,B1,C1,T0);
traits.madd(A1,B1,C5,B1);
traits.loadRhs(&blB[5*RhsProgress], B1);
@@ -667,9 +666,9 @@ EIGEN_ASM_COMMENT("mybegin4");
traits.madd(A1,B3,C7,B3);
traits.loadLhs(&blA[3*LhsProgress], A1);
traits.loadRhs(&blB[7*RhsProgress], B3);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[8*RhsProgress], B0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[8*RhsProgress], B_0);
traits.madd(A0,B1,C1,T0);
traits.madd(A1,B1,C5,B1);
traits.loadRhs(&blB[9*RhsProgress], B1);
@@ -682,9 +681,9 @@ EIGEN_ASM_COMMENT("mybegin4");
traits.loadLhs(&blA[5*LhsProgress], A1);
traits.loadRhs(&blB[11*RhsProgress], B3);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[12*RhsProgress], B0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[12*RhsProgress], B_0);
traits.madd(A0,B1,C1,T0);
traits.madd(A1,B1,C5,B1);
traits.loadRhs(&blB[13*RhsProgress], B1);
@@ -696,8 +695,8 @@ EIGEN_ASM_COMMENT("mybegin4");
traits.madd(A1,B3,C7,B3);
traits.loadLhs(&blA[7*LhsProgress], A1);
traits.loadRhs(&blB[15*RhsProgress], B3);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.madd(A0,B1,C1,T0);
traits.madd(A1,B1,C5,B1);
traits.madd(A0,B2,C2,T0);
@@ -715,32 +714,32 @@ EIGEN_ASM_COMMENT("mybegin4");
if(nr==2)
{
LhsPacket A0, A1;
RhsPacket B0;
RhsPacket B_0;
RhsPacket T0;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadLhs(&blA[1*LhsProgress], A1);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[1*RhsProgress], B0);
traits.madd(A0,B0,C1,T0);
traits.madd(A1,B0,C5,B0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[1*RhsProgress], B_0);
traits.madd(A0,B_0,C1,T0);
traits.madd(A1,B_0,C5,B_0);
}
else
{
LhsPacket A0, A1;
RhsPacket B0, B1, B2, B3;
RhsPacket B_0, B1, B2, B3;
RhsPacket T0;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadLhs(&blA[1*LhsProgress], A1);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.loadRhs(&blB[1*RhsProgress], B1);
traits.madd(A0,B0,C0,T0);
traits.madd(A0,B_0,C0,T0);
traits.loadRhs(&blB[2*RhsProgress], B2);
traits.madd(A1,B0,C4,B0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[3*RhsProgress], B3);
traits.madd(A0,B1,C1,T0);
traits.madd(A1,B1,C5,B1);
@@ -827,42 +826,42 @@ EIGEN_ASM_COMMENT("mybegin4");
if(nr==2)
{
LhsPacket A0;
RhsPacket B0, B1;
RhsPacket B_0, B1;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.loadRhs(&blB[1*RhsProgress], B1);
traits.madd(A0,B0,C0,B0);
traits.loadRhs(&blB[2*RhsProgress], B0);
traits.madd(A0,B_0,C0,B_0);
traits.loadRhs(&blB[2*RhsProgress], B_0);
traits.madd(A0,B1,C1,B1);
traits.loadLhs(&blA[1*LhsProgress], A0);
traits.loadRhs(&blB[3*RhsProgress], B1);
traits.madd(A0,B0,C0,B0);
traits.loadRhs(&blB[4*RhsProgress], B0);
traits.madd(A0,B_0,C0,B_0);
traits.loadRhs(&blB[4*RhsProgress], B_0);
traits.madd(A0,B1,C1,B1);
traits.loadLhs(&blA[2*LhsProgress], A0);
traits.loadRhs(&blB[5*RhsProgress], B1);
traits.madd(A0,B0,C0,B0);
traits.loadRhs(&blB[6*RhsProgress], B0);
traits.madd(A0,B_0,C0,B_0);
traits.loadRhs(&blB[6*RhsProgress], B_0);
traits.madd(A0,B1,C1,B1);
traits.loadLhs(&blA[3*LhsProgress], A0);
traits.loadRhs(&blB[7*RhsProgress], B1);
traits.madd(A0,B0,C0,B0);
traits.madd(A0,B_0,C0,B_0);
traits.madd(A0,B1,C1,B1);
}
else
{
LhsPacket A0;
RhsPacket B0, B1, B2, B3;
RhsPacket B_0, B1, B2, B3;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.loadRhs(&blB[1*RhsProgress], B1);
traits.madd(A0,B0,C0,B0);
traits.madd(A0,B_0,C0,B_0);
traits.loadRhs(&blB[2*RhsProgress], B2);
traits.loadRhs(&blB[3*RhsProgress], B3);
traits.loadRhs(&blB[4*RhsProgress], B0);
traits.loadRhs(&blB[4*RhsProgress], B_0);
traits.madd(A0,B1,C1,B1);
traits.loadRhs(&blB[5*RhsProgress], B1);
traits.madd(A0,B2,C2,B2);
@@ -870,8 +869,8 @@ EIGEN_ASM_COMMENT("mybegin4");
traits.madd(A0,B3,C3,B3);
traits.loadLhs(&blA[1*LhsProgress], A0);
traits.loadRhs(&blB[7*RhsProgress], B3);
traits.madd(A0,B0,C0,B0);
traits.loadRhs(&blB[8*RhsProgress], B0);
traits.madd(A0,B_0,C0,B_0);
traits.loadRhs(&blB[8*RhsProgress], B_0);
traits.madd(A0,B1,C1,B1);
traits.loadRhs(&blB[9*RhsProgress], B1);
traits.madd(A0,B2,C2,B2);
@@ -880,8 +879,8 @@ EIGEN_ASM_COMMENT("mybegin4");
traits.loadLhs(&blA[2*LhsProgress], A0);
traits.loadRhs(&blB[11*RhsProgress], B3);
traits.madd(A0,B0,C0,B0);
traits.loadRhs(&blB[12*RhsProgress], B0);
traits.madd(A0,B_0,C0,B_0);
traits.loadRhs(&blB[12*RhsProgress], B_0);
traits.madd(A0,B1,C1,B1);
traits.loadRhs(&blB[13*RhsProgress], B1);
traits.madd(A0,B2,C2,B2);
@@ -890,7 +889,7 @@ EIGEN_ASM_COMMENT("mybegin4");
traits.loadLhs(&blA[3*LhsProgress], A0);
traits.loadRhs(&blB[15*RhsProgress], B3);
traits.madd(A0,B0,C0,B0);
traits.madd(A0,B_0,C0,B_0);
traits.madd(A0,B1,C1,B1);
traits.madd(A0,B2,C2,B2);
traits.madd(A0,B3,C3,B3);
@@ -905,26 +904,26 @@ EIGEN_ASM_COMMENT("mybegin4");
if(nr==2)
{
LhsPacket A0;
RhsPacket B0, B1;
RhsPacket B_0, B1;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.loadRhs(&blB[1*RhsProgress], B1);
traits.madd(A0,B0,C0,B0);
traits.madd(A0,B_0,C0,B_0);
traits.madd(A0,B1,C1,B1);
}
else
{
LhsPacket A0;
RhsPacket B0, B1, B2, B3;
RhsPacket B_0, B1, B2, B3;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.loadRhs(&blB[1*RhsProgress], B1);
traits.loadRhs(&blB[2*RhsProgress], B2);
traits.loadRhs(&blB[3*RhsProgress], B3);
traits.madd(A0,B0,C0,B0);
traits.madd(A0,B_0,C0,B_0);
traits.madd(A0,B1,C1,B1);
traits.madd(A0,B2,C2,B2);
traits.madd(A0,B3,C3,B3);
@@ -971,26 +970,26 @@ EIGEN_ASM_COMMENT("mybegin4");
if(nr==2)
{
LhsScalar A0;
RhsScalar B0, B1;
RhsScalar B_0, B1;
A0 = blA[k];
B0 = blB[0];
B_0 = blB[0];
B1 = blB[1];
MADD(cj,A0,B0,C0,B0);
MADD(cj,A0,B_0,C0,B_0);
MADD(cj,A0,B1,C1,B1);
}
else
{
LhsScalar A0;
RhsScalar B0, B1, B2, B3;
RhsScalar B_0, B1, B2, B3;
A0 = blA[k];
B0 = blB[0];
B_0 = blB[0];
B1 = blB[1];
B2 = blB[2];
B3 = blB[3];
MADD(cj,A0,B0,C0,B0);
MADD(cj,A0,B_0,C0,B_0);
MADD(cj,A0,B1,C1,B1);
MADD(cj,A0,B2,C2,B2);
MADD(cj,A0,B3,C3,B3);
@@ -1009,12 +1008,7 @@ EIGEN_ASM_COMMENT("mybegin4");
for(Index j2=packet_cols; j2<cols; j2++)
{
// unpack B
{
traits.unpackRhs(depth, &blockB[j2*strideB+offsetB], unpackedB);
// const RhsScalar* blB = &blockB[j2*strideB+offsetB];
// for(Index k=0; k<depth; k++)
// pstore(&unpackedB[k*RhsPacketSize], pset1<RhsPacket>(blB[k]));
}
traits.unpackRhs(depth, &blockB[j2*strideB+offsetB], unpackedB);
for(Index i=0; i<peeled_mc; i+=mr)
{
@@ -1032,14 +1026,14 @@ EIGEN_ASM_COMMENT("mybegin4");
for(Index k=0; k<depth; k++)
{
LhsPacket A0, A1;
RhsPacket B0;
RhsPacket B_0;
RhsPacket T0;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadLhs(&blA[1*LhsProgress], A1);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
blB += RhsProgress;
blA += 2*LhsProgress;
@@ -1071,10 +1065,10 @@ EIGEN_ASM_COMMENT("mybegin4");
for(Index k=0; k<depth; k++)
{
LhsPacket A0;
RhsPacket B0;
RhsPacket B_0;
traits.loadLhs(blA, A0);
traits.loadRhs(blB, B0);
traits.madd(A0, B0, C0, B0);
traits.loadRhs(blB, B_0);
traits.madd(A0, B_0, C0, B_0);
blB += RhsProgress;
blA += LhsProgress;
}
@@ -1096,8 +1090,8 @@ EIGEN_ASM_COMMENT("mybegin4");
for(Index k=0; k<depth; k++)
{
LhsScalar A0 = blA[k];
RhsScalar B0 = blB[k];
MADD(cj, A0, B0, C0, B0);
RhsScalar B_0 = blB[k];
MADD(cj, A0, B_0, C0, B_0);
}
res[(j2+0)*resStride + i] += alpha*C0;
}
@@ -1108,7 +1102,7 @@ EIGEN_ASM_COMMENT("mybegin4");
#undef CJMADD
// pack a block of the lhs
// The travesal is as follow (mr==4):
// The traversal is as follow (mr==4):
// 0 4 8 12 ...
// 1 5 9 13 ...
// 2 6 10 14 ...
@@ -1124,11 +1118,15 @@ EIGEN_ASM_COMMENT("mybegin4");
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
struct gemm_pack_lhs
{
void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
Index stride=0, Index offset=0)
{
// enum { PacketSize = packet_traits<Scalar>::size };
typedef typename packet_traits<Scalar>::type Packet;
enum { PacketSize = packet_traits<Scalar>::size };
EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
Index count = 0;
@@ -1136,9 +1134,44 @@ struct gemm_pack_lhs
for(Index i=0; i<peeled_mc; i+=Pack1)
{
if(PanelMode) count += Pack1 * offset;
for(Index k=0; k<depth; k++)
for(Index w=0; w<Pack1; w++)
blockA[count++] = cj(lhs(i+w, k));
if(StorageOrder==ColMajor)
{
for(Index k=0; k<depth; k++)
{
Packet A, B, C, D;
if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
}
}
else
{
for(Index k=0; k<depth; k++)
{
// TODO add a vectorized transpose here
Index w=0;
for(; w<Pack1-3; w+=4)
{
Scalar a(cj(lhs(i+w+0, k))),
b(cj(lhs(i+w+1, k))),
c(cj(lhs(i+w+2, k))),
d(cj(lhs(i+w+3, k)));
blockA[count++] = a;
blockA[count++] = b;
blockA[count++] = c;
blockA[count++] = d;
}
if(Pack1%4)
for(;w<Pack1;++w)
blockA[count++] = cj(lhs(i+w, k));
}
}
if(PanelMode) count += Pack1 * (stride-offset-depth);
}
if(rows-peeled_mc>=Pack2)
@@ -1172,9 +1205,10 @@ struct gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
{
typedef typename packet_traits<Scalar>::type Packet;
enum { PacketSize = packet_traits<Scalar>::size };
void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
Index stride=0, Index offset=0)
{
EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
Index packet_cols = (cols/nr) * nr;
@@ -1219,9 +1253,10 @@ template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode
struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
{
enum { PacketSize = packet_traits<Scalar>::size };
void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
Index stride=0, Index offset=0)
{
EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
Index packet_cols = (cols/nr) * nr;

View File

@@ -78,7 +78,7 @@ static void run(Index rows, Index cols, Index depth,
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = std::min(rows,blocking.mc()); // cache block size along the M direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
//Index nc = blocking.nc(); // cache block size along the N direction
gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
@@ -94,15 +94,16 @@ static void run(Index rows, Index cols, Index depth,
std::size_t sizeA = kc*mc;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
LhsScalar* blockA = ei_aligned_stack_new(LhsScalar, sizeA);
RhsScalar* w = ei_aligned_stack_new(RhsScalar, sizeW);
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, 0);
ei_declare_aligned_stack_constructed_variable(RhsScalar, w, sizeW, 0);
RhsScalar* blockB = blocking.blockB();
eigen_internal_assert(blockB!=0);
// For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
for(Index k=0; k<depth; k+=kc)
{
const Index actual_kc = std::min(k+kc,depth)-k; // => rows of B', and cols of the A'
const Index actual_kc = (std::min)(k+kc,depth)-k; // => rows of B', and cols of the A'
// In order to reduce the chance that a thread has to wait for the other,
// let's start by packing A'.
@@ -139,7 +140,7 @@ static void run(Index rows, Index cols, Index depth,
// Then keep going as usual with the remaining A'
for(Index i=mc; i<rows; i+=mc)
{
const Index actual_mc = std::min(i+mc,rows)-i;
const Index actual_mc = (std::min)(i+mc,rows)-i;
// pack A_i,k to A'
pack_lhs(blockA, &lhs(i,k), lhsStride, actual_kc, actual_mc);
@@ -154,9 +155,6 @@ static void run(Index rows, Index cols, Index depth,
#pragma omp atomic
--(info[j].users);
}
ei_aligned_stack_delete(LhsScalar, blockA, kc*mc);
ei_aligned_stack_delete(RhsScalar, w, sizeW);
}
else
#endif // EIGEN_HAS_OPENMP
@@ -167,15 +165,16 @@ static void run(Index rows, Index cols, Index depth,
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
LhsScalar *blockA = blocking.blockA()==0 ? ei_aligned_stack_new(LhsScalar, sizeA) : blocking.blockA();
RhsScalar *blockB = blocking.blockB()==0 ? ei_aligned_stack_new(RhsScalar, sizeB) : blocking.blockB();
RhsScalar *blockW = blocking.blockW()==0 ? ei_aligned_stack_new(RhsScalar, sizeW) : blocking.blockW();
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockW, sizeW, blocking.blockW());
// For each horizontal panel of the rhs, and corresponding panel of the lhs...
// (==GEMM_VAR1)
for(Index k2=0; k2<depth; k2+=kc)
{
const Index actual_kc = std::min(k2+kc,depth)-k2;
const Index actual_kc = (std::min)(k2+kc,depth)-k2;
// OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.
// => Pack rhs's panel into a sequential chunk of memory (L2 caching)
@@ -188,7 +187,7 @@ static void run(Index rows, Index cols, Index depth,
// (==GEPP_VAR1)
for(Index i2=0; i2<rows; i2+=mc)
{
const Index actual_mc = std::min(i2+mc,rows)-i2;
const Index actual_mc = (std::min)(i2+mc,rows)-i2;
// We pack the lhs's block into a sequential chunk of memory (L1 caching)
// Note that this block will be read a very high number of times, which is equal to the number of
@@ -200,10 +199,6 @@ static void run(Index rows, Index cols, Index depth,
}
}
if(blocking.blockA()==0) ei_aligned_stack_delete(LhsScalar, blockA, sizeA);
if(blocking.blockB()==0) ei_aligned_stack_delete(RhsScalar, blockB, sizeB);
if(blocking.blockW()==0) ei_aligned_stack_delete(RhsScalar, blockW, sizeW);
}
}
@@ -417,8 +412,8 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);

View File

@@ -42,14 +42,14 @@ struct tribb_kernel;
template <typename Index,
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
int ResStorageOrder, int UpLo>
int ResStorageOrder, int UpLo, int Version = Specialized>
struct general_matrix_matrix_triangular_product;
// as usual if the result is row major => we transpose the product
template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo>
{
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo,Version>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
@@ -63,8 +63,8 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
};
template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo>
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Version>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
@@ -83,10 +83,10 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
if(mc > Traits::nr)
mc = (mc/Traits::nr)*Traits::nr;
LhsScalar* blockA = ei_aligned_stack_new(LhsScalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*size;
RhsScalar* allocatedBlockB = ei_aligned_stack_new(RhsScalar, sizeB);
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(RhsScalar, allocatedBlockB, sizeB, 0);
RhsScalar* blockB = allocatedBlockB + sizeW;
gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
@@ -96,14 +96,14 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
for(Index k2=0; k2<depth; k2+=kc)
{
const Index actual_kc = std::min(k2+kc,depth)-k2;
const Index actual_kc = (std::min)(k2+kc,depth)-k2;
// note that the actual rhs is the transpose/adjoint of mat
pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, size);
for(Index i2=0; i2<size; i2+=mc)
{
const Index actual_mc = std::min(i2+mc,size)-i2;
const Index actual_mc = (std::min)(i2+mc,size)-i2;
pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
@@ -112,7 +112,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
// 2 - the actual_mc x actual_mc symmetric block => processed with a special kernel
// 3 - after the diagonal => processed with gebp or skipped
if (UpLo==Lower)
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, std::min(size,i2), alpha,
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, (std::min)(size,i2), alpha,
-1, -1, 0, 0, allocatedBlockB);
sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB);
@@ -120,13 +120,11 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
if (UpLo==Upper)
{
Index j2 = i2+actual_mc;
gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, std::max(Index(0), size-j2), alpha,
gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, (std::max)(Index(0), size-j2), alpha,
-1, -1, 0, 0, allocatedBlockB);
}
}
}
ei_aligned_stack_delete(LhsScalar, blockA, kc*mc);
ei_aligned_stack_delete(RhsScalar, allocatedBlockB, sizeB);
}
};
@@ -203,13 +201,13 @@ TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
const ActualLhs actualLhs = LhsBlasTraits::extract(prod.lhs());
typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
typedef typename internal::remove_all<typename ProductDerived::RhsNested>::type Rhs;
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
const ActualRhs actualRhs = RhsBlasTraits::extract(prod.rhs());
typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
typename ProductDerived::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());

View File

@@ -0,0 +1,142 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Level 3 BLAS SYRK/HERK implementation.
********************************************************************************
*/
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
namespace internal {
template <typename Index, typename Scalar, int AStorageOrder, bool ConjugateA, int ResStorageOrder, int UpLo>
struct general_matrix_matrix_rankupdate :
general_matrix_matrix_triangular_product<
Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,UpLo,BuiltIn> {};
// try to go to BLAS specialization
#define EIGEN_MKL_RANKUPDATE_SPECIALIZE(Scalar) \
template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs, int UpLo> \
struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \
Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Specialized> { \
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) \
{ \
if (lhs==rhs) { \
general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
} else { \
general_matrix_matrix_triangular_product<Index, \
Scalar, LhsStorageOrder, ConjugateLhs, \
Scalar, RhsStorageOrder, ConjugateRhs, \
ColMajor, UpLo, BuiltIn> \
::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
} \
} \
};
EIGEN_MKL_RANKUPDATE_SPECIALIZE(double)
//EIGEN_MKL_RANKUPDATE_SPECIALIZE(dcomplex)
EIGEN_MKL_RANKUPDATE_SPECIALIZE(float)
//EIGEN_MKL_RANKUPDATE_SPECIALIZE(scomplex)
// SYRK for float/double
#define EIGEN_MKL_RANKUPDATE_R(EIGTYPE, MKLTYPE, MKLFUNC) \
template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
enum { \
IsLower = (UpLo&Lower) == Lower, \
LowUp = IsLower ? Lower : Upper, \
conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \
}; \
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
{ \
/* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \
\
MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \
MKLTYPE alpha_, beta_; \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
MKLFUNC(&uplo, &trans, &n, &k, &alpha_, lhs, &lda, &beta_, res, &ldc); \
} \
};
// HERK for complex data
#define EIGEN_MKL_RANKUPDATE_C(EIGTYPE, MKLTYPE, RTYPE, MKLFUNC) \
template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
enum { \
IsLower = (UpLo&Lower) == Lower, \
LowUp = IsLower ? Lower : Upper, \
conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \
}; \
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
{ \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \
\
MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \
RTYPE alpha_, beta_; \
const EIGTYPE* a_ptr; \
\
/* Set alpha_ & beta_ */ \
/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); */\
/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1));*/ \
alpha_ = alpha.real(); \
beta_ = 1.0; \
/* Copy with conjugation in some cases*/ \
MatrixType a; \
if (conjA) { \
Map<const MatrixType, 0, OuterStride<> > mapA(lhs,n,k,OuterStride<>(lhsStride)); \
a = mapA.conjugate(); \
lda = a.outerStride(); \
a_ptr = a.data(); \
} else a_ptr=lhs; \
MKLFUNC(&uplo, &trans, &n, &k, &alpha_, (MKLTYPE*)a_ptr, &lda, &beta_, (MKLTYPE*)res, &ldc); \
} \
};
EIGEN_MKL_RANKUPDATE_R(double, double, dsyrk)
EIGEN_MKL_RANKUPDATE_R(float, float, ssyrk)
//EIGEN_MKL_RANKUPDATE_C(dcomplex, MKL_Complex16, double, zherk)
//EIGEN_MKL_RANKUPDATE_C(scomplex, MKL_Complex8, double, cherk)
} // end namespace internal
#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H

View File

@@ -0,0 +1,114 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* General matrix-matrix product functionality based on ?GEMM.
********************************************************************************
*/
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
#define EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
namespace internal {
/**********************************************************************
* This file implements general matrix-matrix multiplication using BLAS
* gemm function via partial specialization of
* general_matrix_matrix_product::run(..) method for float, double,
* std::complex<float> and std::complex<double> types
**********************************************************************/
// gemm specialization
#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, MKLTYPE, MKLPREFIX) \
template< \
typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor> \
{ \
static void run(Index rows, Index cols, Index depth, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha, \
level3_blocking<EIGTYPE, EIGTYPE>& blocking, \
GemmParallelInfo<Index>* info = 0) \
{ \
using std::conj; \
\
char transa, transb; \
MKL_INT m, n, k, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
MKLTYPE alpha_, beta_; \
MatrixX##EIGPREFIX a_tmp, b_tmp; \
EIGTYPE myone(1);\
\
/* Set transpose options */ \
transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
\
/* Set m, n, k */ \
m = (MKL_INT)rows; \
n = (MKL_INT)cols; \
k = (MKL_INT)depth; \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
\
/* Set lda, ldb, ldc */ \
lda = (MKL_INT)lhsStride; \
ldb = (MKL_INT)rhsStride; \
ldc = (MKL_INT)resStride; \
\
/* Set a, b, c */ \
if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \
a_tmp = lhs.conjugate(); \
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else a = _lhs; \
\
if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \
b_tmp = rhs.conjugate(); \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
} else b = _rhs; \
\
MKLPREFIX##gemm(&transa, &transb, &m, &n, &k, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
}};
GEMM_SPECIALIZATION(double, d, double, d)
GEMM_SPECIALIZATION(float, f, float, s)
GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, z)
GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, c)
} //end of namespase
#endif // EIGEN_GENERAL_MATRIX_MATRIX_MKL_H

View File

@@ -40,8 +40,8 @@ namespace internal {
* |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp
* |cplx |real |real | optimal case, vectorization possible via real-cplx mul
*/
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
@@ -99,7 +99,7 @@ EIGEN_DONT_INLINE static void run(
// How many coeffs of the result do we have to skip to be aligned.
// Here we assume data are at least aligned on the base scalar type.
Index alignedStart = first_aligned(res,size);
Index alignedStart = internal::first_aligned(res,size);
Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
@@ -109,7 +109,7 @@ EIGEN_DONT_INLINE static void run(
: FirstAligned;
// we cannot assume the first element is aligned because of sub-matrices
const Index lhsAlignmentOffset = first_aligned(lhs,size);
const Index lhsAlignmentOffset = internal::first_aligned(lhs,size);
// find how many columns do we have to skip to be aligned with the result (if possible)
Index skipColumns = 0;
@@ -134,7 +134,7 @@ EIGEN_DONT_INLINE static void run(
}
else
{
skipColumns = std::min(skipColumns,cols);
skipColumns = (std::min)(skipColumns,cols);
// note that the skiped columns are processed later.
}
@@ -296,8 +296,8 @@ EIGEN_DONT_INLINE static void run(
* - alpha is always a complex (or converted to a complex)
* - no vectorization
*/
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
@@ -351,7 +351,7 @@ EIGEN_DONT_INLINE static void run(
// How many coeffs of the result do we have to skip to be aligned.
// Here we assume data are at least aligned on the base scalar type
// if that's not the case then vectorization is discarded, see below.
Index alignedStart = first_aligned(rhs, depth);
Index alignedStart = internal::first_aligned(rhs, depth);
Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
@@ -361,7 +361,7 @@ EIGEN_DONT_INLINE static void run(
: FirstAligned;
// we cannot assume the first element is aligned because of sub-matrices
const Index lhsAlignmentOffset = first_aligned(lhs,depth);
const Index lhsAlignmentOffset = internal::first_aligned(lhs,depth);
// find how many rows do we have to skip to be aligned with rhs (if possible)
Index skipRows = 0;
@@ -386,7 +386,7 @@ EIGEN_DONT_INLINE static void run(
}
else
{
skipRows = std::min(skipRows,Index(rows));
skipRows = (std::min)(skipRows,Index(rows));
// note that the skiped columns are processed later.
}
eigen_internal_assert( alignmentPattern==NoneAligned

View File

@@ -0,0 +1,127 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* General matrix-vector product functionality based on ?GEMV.
********************************************************************************
*/
#ifndef EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
#define EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
namespace internal {
/**********************************************************************
* This file implements general matrix-vector multiplication using BLAS
* gemv function via partial specialization of
* general_matrix_vector_product::run(..) method for float, double,
* std::complex<float> and std::complex<double> types
**********************************************************************/
// gemv specialization
template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct general_matrix_vector_product_gemv :
general_matrix_vector_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,ConjugateRhs,BuiltIn> {};
#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const Scalar* lhs, Index lhsStride, \
const Scalar* rhs, Index rhsIncr, \
Scalar* res, Index resIncr, Scalar alpha) \
{ \
if (ConjugateLhs) { \
general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,BuiltIn>::run( \
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
} else { \
general_matrix_vector_product_gemv<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
} \
} \
}; \
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const Scalar* lhs, Index lhsStride, \
const Scalar* rhs, Index rhsIncr, \
Scalar* res, Index resIncr, Scalar alpha) \
{ \
general_matrix_vector_product_gemv<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
} \
}; \
EIGEN_MKL_GEMV_SPECIALIZE(double)
EIGEN_MKL_GEMV_SPECIALIZE(float)
EIGEN_MKL_GEMV_SPECIALIZE(dcomplex)
EIGEN_MKL_GEMV_SPECIALIZE(scomplex)
#define EIGEN_MKL_GEMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLPREFIX) \
template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
{ \
typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\
\
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* rhs, Index rhsIncr, \
EIGTYPE* res, Index resIncr, EIGTYPE alpha) \
{ \
MKL_INT m=rows, n=cols, lda=lhsStride, incx=rhsIncr, incy=resIncr; \
MKLTYPE alpha_, beta_; \
const EIGTYPE *x_ptr, myone(1); \
char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \
if (LhsStorageOrder==RowMajor) { \
m=cols; \
n=rows; \
}\
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
GEMVVector x_tmp; \
if (ConjugateRhs) { \
Map<const GEMVVector, 0, InnerStride<> > map_x(rhs,cols,1,InnerStride<>(incx)); \
x_tmp=map_x.conjugate(); \
x_ptr=x_tmp.data(); \
incx=1; \
} else x_ptr=rhs; \
MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
}\
};
EIGEN_MKL_GEMV_SPECIALIZATION(double, double, d)
EIGEN_MKL_GEMV_SPECIALIZATION(float, float, s)
EIGEN_MKL_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, z)
EIGEN_MKL_GEMV_SPECIALIZATION(scomplex, MKL_Complex8, c)
} //end of namespase
#endif // EIGEN_GENERAL_MATRIX_VECTOR_MKL_H

View File

@@ -30,7 +30,7 @@ namespace internal {
/** \internal */
inline void manage_multi_threading(Action action, int* v)
{
static int m_maxThreads = -1;
static EIGEN_UNUSED int m_maxThreads = -1;
if(action==SetAction)
{
@@ -85,7 +85,9 @@ template<typename Index> struct GemmParallelInfo
template<bool Condition, typename Functor, typename Index>
void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
{
#ifndef EIGEN_HAS_OPENMP
// TODO when EIGEN_USE_BLAS is defined,
// we should still enable OMP for other scalar types
#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
// FIXME the transpose variable is only needed to properly split
// the matrix product when multithreading is enabled. This is a temporary
// fix to support row-major destination matrices. This whole

View File

@@ -114,7 +114,7 @@ struct symm_pack_rhs
}
// second part: diagonal block
for(Index j2=k2; j2<std::min(k2+rows,packet_cols); j2+=nr)
for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr)
{
// again we can split vertically in three different parts (transpose, symmetric, normal)
// transpose
@@ -179,7 +179,7 @@ struct symm_pack_rhs
for(Index j2=packet_cols; j2<cols; ++j2)
{
// transpose
Index half = std::min(end_k,j2);
Index half = (std::min)(end_k,j2);
for(Index k=k2; k<half; k++)
{
blockB[count] = conj(rhs(j2,k));
@@ -261,12 +261,12 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
// kc must smaller than mc
kc = std::min(kc,mc);
kc = (std::min)(kc,mc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* blockB = allocatedBlockB + sizeW;
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
@@ -276,7 +276,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
for(Index k2=0; k2<size; k2+=kc)
{
const Index actual_kc = std::min(k2+kc,size)-k2;
const Index actual_kc = (std::min)(k2+kc,size)-k2;
// we have selected one row panel of rhs and one column panel of lhs
// pack rhs's panel into a sequential chunk of memory
@@ -289,7 +289,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
// 3 - the panel below the diagonal block => generic packed copy
for(Index i2=0; i2<k2; i2+=mc)
{
const Index actual_mc = std::min(i2+mc,k2)-i2;
const Index actual_mc = (std::min)(i2+mc,k2)-i2;
// transposed packed copy
pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);
@@ -297,7 +297,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
}
// the block diagonal
{
const Index actual_mc = std::min(k2+kc,size)-k2;
const Index actual_mc = (std::min)(k2+kc,size)-k2;
// symmetric packed copy
pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
@@ -306,16 +306,13 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
for(Index i2=k2+kc; i2<size; i2+=mc)
{
const Index actual_mc = std::min(i2+mc,size)-i2;
const Index actual_mc = (std::min)(i2+mc,size)-i2;
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
}
}
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
}
};
@@ -343,11 +340,10 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
Index mc = rows; // cache block size along the M direction
Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* blockB = allocatedBlockB + sizeW;
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
@@ -356,22 +352,19 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
for(Index k2=0; k2<size; k2+=kc)
{
const Index actual_kc = std::min(k2+kc,size)-k2;
const Index actual_kc = (std::min)(k2+kc,size)-k2;
pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
// => GEPP
for(Index i2=0; i2<rows; i2+=mc)
{
const Index actual_mc = std::min(i2+mc,rows)-i2;
const Index actual_mc = (std::min)(i2+mc,rows)-i2;
pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
}
}
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
}
};
@@ -407,8 +400,8 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);

View File

@@ -0,0 +1,291 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
********************************************************************************
*/
#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
#define EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
namespace internal {
/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
#define EIGEN_MKL_SYMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
{\
\
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha) \
{ \
char side='L', uplo='L'; \
MKL_INT m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
MKLTYPE alpha_, beta_; \
MatrixX##EIGPREFIX b_tmp; \
EIGTYPE myone(1);\
\
/* Set transpose options */ \
/* Set m, n, k */ \
m = (MKL_INT)rows; \
n = (MKL_INT)cols; \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
\
/* Set lda, ldb, ldc */ \
lda = (MKL_INT)lhsStride; \
ldb = (MKL_INT)rhsStride; \
ldc = (MKL_INT)resStride; \
\
/* Set a, b, c */ \
if (LhsStorageOrder==RowMajor) uplo='U'; \
a = _lhs; \
\
if (RhsStorageOrder==RowMajor) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = rhs.adjoint(); \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
} else b = _rhs; \
\
MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
\
} \
};
#define EIGEN_MKL_HEMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
{\
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha) \
{ \
char side='L', uplo='L'; \
MKL_INT m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
MKLTYPE alpha_, beta_; \
MatrixX##EIGPREFIX b_tmp; \
Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \
EIGTYPE myone(1); \
\
/* Set transpose options */ \
/* Set m, n, k */ \
m = (MKL_INT)rows; \
n = (MKL_INT)cols; \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
\
/* Set lda, ldb, ldc */ \
lda = (MKL_INT)lhsStride; \
ldb = (MKL_INT)rhsStride; \
ldc = (MKL_INT)resStride; \
\
/* Set a, b, c */ \
if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \
Map<const Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder>, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \
a_tmp = lhs.conjugate(); \
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else a = _lhs; \
if (LhsStorageOrder==RowMajor) uplo='U'; \
\
if (RhsStorageOrder==ColMajor && (!ConjugateRhs)) { \
b = _rhs; } \
else { \
if (RhsStorageOrder==ColMajor && ConjugateRhs) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,m,n,OuterStride<>(rhsStride)); \
b_tmp = rhs.conjugate(); \
} else \
if (ConjugateRhs) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = rhs.adjoint(); \
} else { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = rhs.transpose(); \
} \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
} \
\
MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
\
} \
};
EIGEN_MKL_SYMM_L(double, double, d, d)
EIGEN_MKL_SYMM_L(float, float, f, s)
EIGEN_MKL_HEMM_L(dcomplex, MKL_Complex16, cd, z)
EIGEN_MKL_HEMM_L(scomplex, MKL_Complex8, cf, c)
/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
#define EIGEN_MKL_SYMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
{\
\
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha) \
{ \
char side='R', uplo='L'; \
MKL_INT m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
MKLTYPE alpha_, beta_; \
MatrixX##EIGPREFIX b_tmp; \
EIGTYPE myone(1);\
\
/* Set m, n, k */ \
m = (MKL_INT)rows; \
n = (MKL_INT)cols; \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
\
/* Set lda, ldb, ldc */ \
lda = (MKL_INT)rhsStride; \
ldb = (MKL_INT)lhsStride; \
ldc = (MKL_INT)resStride; \
\
/* Set a, b, c */ \
if (RhsStorageOrder==RowMajor) uplo='U'; \
a = _rhs; \
\
if (LhsStorageOrder==RowMajor) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = lhs.adjoint(); \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
} else b = _lhs; \
\
MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
\
} \
};
#define EIGEN_MKL_HEMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
{\
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha) \
{ \
char side='R', uplo='L'; \
MKL_INT m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
MKLTYPE alpha_, beta_; \
MatrixX##EIGPREFIX b_tmp; \
Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \
EIGTYPE myone(1); \
\
/* Set m, n, k */ \
m = (MKL_INT)rows; \
n = (MKL_INT)cols; \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
\
/* Set lda, ldb, ldc */ \
lda = (MKL_INT)rhsStride; \
ldb = (MKL_INT)lhsStride; \
ldc = (MKL_INT)resStride; \
\
/* Set a, b, c */ \
if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \
Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \
a_tmp = rhs.conjugate(); \
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else a = _rhs; \
if (RhsStorageOrder==RowMajor) uplo='U'; \
\
if (LhsStorageOrder==ColMajor && (!ConjugateLhs)) { \
b = _lhs; } \
else { \
if (LhsStorageOrder==ColMajor && ConjugateLhs) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,n,OuterStride<>(lhsStride)); \
b_tmp = lhs.conjugate(); \
} else \
if (ConjugateLhs) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
b_tmp = lhs.adjoint(); \
} else { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
b_tmp = lhs.transpose(); \
} \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
} \
\
MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
} \
};
EIGEN_MKL_SYMM_R(double, double, d, d)
EIGEN_MKL_SYMM_R(float, float, f, s)
EIGEN_MKL_HEMM_R(dcomplex, MKL_Complex16, cd, z)
EIGEN_MKL_HEMM_R(scomplex, MKL_Complex8, cf, c)
} // end namespace internal
#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H

View File

@@ -32,8 +32,15 @@ namespace internal {
* the number of load/stores of the result by a factor 2 and to reduce
* the instruction dependency.
*/
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
static EIGEN_DONT_INLINE void product_selfadjoint_vector(
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version=Specialized>
struct selfadjoint_matrix_vector_product;
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
struct selfadjoint_matrix_vector_product
{
static EIGEN_DONT_INLINE void run(
Index size,
const Scalar* lhs, Index lhsStride,
const Scalar* _rhs, Index rhsIncr,
@@ -62,17 +69,15 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector(
// FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed.
// if the rhs is not sequentially stored in memory we copy it to a temporary buffer,
// this is because we need to extract packets
const Scalar* EIGEN_RESTRICT rhs = _rhs;
ei_declare_aligned_stack_constructed_variable(Scalar,rhs,size,rhsIncr==1 ? const_cast<Scalar*>(_rhs) : 0);
if (rhsIncr!=1)
{
Scalar* r = ei_aligned_stack_new(Scalar, size);
const Scalar* it = _rhs;
for (Index i=0; i<size; ++i, it+=rhsIncr)
r[i] = *it;
rhs = r;
rhs[i] = *it;
}
Index bound = std::max(Index(0),size-8) & 0xfffffffe;
Index bound = (std::max)(Index(0),size-8) & 0xfffffffe;
if (FirstTriangular)
bound = size - bound;
@@ -87,14 +92,14 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector(
Scalar t1 = cjAlpha * rhs[j+1];
Packet ptmp1 = pset1<Packet>(t1);
Scalar t2 = 0;
Scalar t2(0);
Packet ptmp2 = pset1<Packet>(t2);
Scalar t3 = 0;
Scalar t3(0);
Packet ptmp3 = pset1<Packet>(t3);
size_t starti = FirstTriangular ? 0 : j+2;
size_t endi = FirstTriangular ? j : size;
size_t alignedStart = (starti) + first_aligned(&res[starti], endi-starti);
size_t alignedStart = (starti) + internal::first_aligned(&res[starti], endi-starti);
size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
// TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
@@ -150,7 +155,7 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector(
register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
Scalar t1 = cjAlpha * rhs[j];
Scalar t2 = 0;
Scalar t2(0);
// TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
res[j] += cjd.pmul(internal::real(A0[j]), t1);
for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++)
@@ -160,10 +165,8 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector(
}
res[j] += alpha * t2;
}
if(rhsIncr!=1)
ei_aligned_stack_delete(Scalar, const_cast<Scalar*>(rhs), size);
}
};
} // end namespace internal
@@ -198,8 +201,8 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols());
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
@@ -211,45 +214,33 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest;
internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs;
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
EvalToDest ? dest.data() : static_dest.data());
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,rhs.size(),
UseRhs ? const_cast<RhsScalar*>(rhs.data()) : static_rhs.data());
bool freeDestPtr = false;
ResScalar* actualDestPtr;
if(EvalToDest)
actualDestPtr = dest.data();
else
if(!EvalToDest)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if((actualDestPtr=static_dest.data())==0)
{
freeDestPtr = true;
actualDestPtr = ei_aligned_stack_new(ResScalar,dest.size());
}
MappedDest(actualDestPtr, dest.size()) = dest;
}
bool freeRhsPtr = false;
RhsScalar* actualRhsPtr;
if(UseRhs)
actualRhsPtr = const_cast<RhsScalar*>(rhs.data());
else
if(!UseRhs)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = rhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if((actualRhsPtr=static_rhs.data())==0)
{
freeRhsPtr = true;
actualRhsPtr = ei_aligned_stack_new(RhsScalar,rhs.size());
}
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
}
internal::product_selfadjoint_vector<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>
internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run
(
lhs.rows(), // size
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
@@ -259,11 +250,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
);
if(!EvalToDest)
{
dest = MappedDest(actualDestPtr, dest.size());
if(freeDestPtr) ei_aligned_stack_delete(ResScalar, actualDestPtr, dest.size());
}
if(freeRhsPtr) ei_aligned_stack_delete(RhsScalar, actualRhsPtr, rhs.size());
}
};

View File

@@ -0,0 +1,110 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV.
********************************************************************************
*/
#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
#define EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
namespace internal {
/**********************************************************************
* This file implements selfadjoint matrix-vector multiplication using BLAS
**********************************************************************/
// symv/hemv specialization
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
struct selfadjoint_matrix_vector_product_symv :
selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn> {};
#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
static EIGEN_DONT_INLINE void run( \
Index size, const Scalar* lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \
enum {\
IsColMajor = StorageOrder==ColMajor \
}; \
if (IsColMajor == ConjugateLhs) {\
selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \
size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \
} else {\
selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \
size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \
}\
} \
}; \
EIGEN_MKL_SYMV_SPECIALIZE(double)
EIGEN_MKL_SYMV_SPECIALIZE(float)
EIGEN_MKL_SYMV_SPECIALIZE(dcomplex)
EIGEN_MKL_SYMV_SPECIALIZE(scomplex)
#define EIGEN_MKL_SYMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLFUNC) \
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \
{ \
typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
\
static EIGEN_DONT_INLINE void run( \
Index size, const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \
{ \
enum {\
IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \
IsLower = UpLo == Lower ? 1 : 0, \
}; \
MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \
MKLTYPE alpha_, beta_; \
const EIGTYPE *x_ptr, myone(1); \
char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
SYMVVector x_tmp; \
if (ConjugateRhs) { \
Map<const SYMVVector, 0, InnerStride<> > map_x(_rhs,size,1,InnerStride<>(incx)); \
x_tmp=map_x.conjugate(); \
x_ptr=x_tmp.data(); \
incx=1; \
} else x_ptr=_rhs; \
MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
}\
};
EIGEN_MKL_SYMV_SPECIALIZATION(double, double, dsymv)
EIGEN_MKL_SYMV_SPECIALIZATION(float, float, ssymv)
EIGEN_MKL_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
EIGEN_MKL_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv)
} //end of namespase
#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H

Some files were not shown because too many files have changed in this diff Show More