Compare commits

..

438 Commits

Author SHA1 Message Date
Gael Guennebaud
fe0350cf1b bump 2012-02-06 16:39:26 +01:00
Gael Guennebaud
99c694623a fix a dozen of warnings with MSVC, and get rid of some useless throw() 2012-02-06 15:57:51 +01:00
Gael Guennebaud
6ad48c5d92 fix conjugation in packet_lhs 2012-02-05 18:18:38 +01:00
Gael Guennebaud
4ed87c59c7 Update the PARDISO interface to match other sparse solvers.
- Add support for Upper or Lower inputs.
- Add supports for sparse RHS
- Remove transposed cases, remove ordering method interface
- Add full access to PARDISO parameters
2012-02-04 14:20:56 +01:00
Gael Guennebaud
1763f86364 add the recent setFromTriplets() feature in the manual 2012-02-04 10:44:07 +01:00
Gael Guennebaud
fe85b7ebc6 fix several const qualifier issues: double ones, meaningless ones, some missing ones, etc.
(note that const qualifiers are set by internall::nested)
2012-02-03 23:18:26 +01:00
Gael Guennebaud
bc7b251cd9 fix compilation errors with ICC 2012-02-03 23:16:52 +01:00
Gael Guennebaud
a594d7ffd7 stop disabling this legitimate warning, recall that in the following the const on FooRef is really meaningless:
typedef Foo& FooRef;
const FooRef foo;
2012-02-03 23:16:11 +01:00
Gael Guennebaud
ad4aa7873f remove unused variables 2012-02-03 13:30:48 +01:00
Gael Guennebaud
fd4aefadcd fix ctest -D Foo with MSVC 2008 2012-02-03 10:50:49 +01:00
Zuiquan
a64407f086 Enable Eigen to compile on 'pure C/C++' Gcc environment (with no inline assembly or asm directive). Required if we want to use Eigen with Adobe Alchemy. 2012-02-02 12:05:02 +01:00
Gael Guennebaud
13abb37721 shutup floating point underflow warning for this specific unit test 2012-01-31 23:18:17 +01:00
Gael Guennebaud
7002639844 the default ctor had no sense because of the const reference member 2012-01-31 23:12:04 +01:00
Gael Guennebaud
13e46ad847 add missing return *this 2012-01-31 23:11:13 +01:00
Gael Guennebaud
9a954d29ec rm non standard and useless overloads of is_arithmetic for long long 2012-01-31 21:45:03 +01:00
Gael Guennebaud
634fedaf68 proper C++ casting 2012-01-31 18:56:25 +01:00
Gael Guennebaud
10cd52350f fix a few warnings: change of sign and missing return statement 2012-01-31 13:05:44 +01:00
Gael Guennebaud
9c86ee2695 fix static inline versus inline static issues (the former is the correct order) 2012-01-31 12:58:52 +01:00
Gael Guennebaud
8d6e394b06 workaround "empty macro argument" warning 2012-01-31 12:46:14 +01:00
Gael Guennebaud
670e3af5a8 add .data() member to Diagonal<> 2012-01-31 12:44:59 +01:00
Gael Guennebaud
18e3ac0f0d fix some compilation errors with ICC and -strict-ansi 2012-01-31 09:14:01 +01:00
Gael Guennebaud
87138075da add the possibility to assemble a SparseMatrix object from a random list of triplets that may contain duplicated elements. It works in linear time, with O(1) re-allocations. 2012-01-28 11:13:59 +01:00
Gael Guennebaud
fc2d85d139 fix memory leak in SuperLUSupport 2012-01-27 10:07:09 +01:00
Gael Guennebaud
27d222d23e honor nested types in dense * sparse 2012-01-27 09:39:36 +01:00
Jitse Niesen
ed244e9c1a Document that JacobiSVD also handles complex matrices.
Thanks to 'Jazzdude' for noting this on IRC.
2012-01-26 13:16:50 +00:00
Gael Guennebaud
0251bb6c1d add missing inline keyword (linking issue) 2012-01-26 10:53:42 +01:00
Gael Guennebaud
65d5311c68 SimplicialCholesky: the shift offset must be real, and fix a comparison issue for complexes 2012-01-26 10:34:45 +01:00
Gael Guennebaud
d9f5840f7a simple compilation fix 2012-01-26 08:52:20 +01:00
Gael Guennebaud
a108216af1 fix bug #410: fix a possible out of range access in EigenSolver 2012-01-25 19:02:31 +01:00
Christoph Hertzberg
362fcabc44 Check for positive definiteness in SimplicialLLT 2012-01-14 22:34:18 +01:00
Gael Guennebaud
5e4dfa4a09 fix a nesting type issue in Sparse/TriangularView 2012-01-25 18:16:48 +01:00
Gael Guennebaud
606e204f6d fix bug #406: Using OpenMP and Eigen causes infinite loop/deadlock
(transplanted from fd52daae87
)
2012-01-25 17:42:22 +01:00
Gael Guennebaud
c68616b3b5 fix warning with gcc 4.6 2012-01-25 15:48:50 +01:00
Gael Guennebaud
87f2af5930 workaround ICC compilation error with -strict-ansi 2012-01-25 15:45:01 +01:00
Gael Guennebaud
d615d39af0 determine windows version from major.minor only, the patch number is irrelevant. 2012-01-23 21:56:46 +01:00
Gael Guennebaud
0d03492e1e std::isfinite is non standard 2012-01-23 21:49:00 +01:00
Gael Guennebaud
ee9f3e34b0 LLT: improve rankUpdate to support downdates,
LDLT: add the missing info() function,
improve unit testing of rankUpdate()
2012-01-23 17:28:23 +01:00
Abraham Bachrach
039408cd66 added functions to allow for cwise min/max operations with scalar argument (bug #400).
added function for array.min(), array.max(), matrix.cwiseMin(), matrix.cwiseMax().

The matrix.cwiseMin/Max functions required the definition of the ConstantReturnType typedef.
However, it wasn't defined until after MatrixCwiseBinaryOps was included in Eigen/src/SparseCore/SparseMatrixBase.h,
so I moved those includes after the definition of the typedefs.

tests for both the regular and scalar min/max functions were added as well
2012-01-11 11:00:30 -05:00
Gael Guennebaud
238999045c optimize the packing of lhs blocks for matrix-matrix products => significant speedup for small products 2012-01-21 19:34:28 +01:00
Jitse Niesen
0e1e0a2a58 Make sure that now-fixed assert is not triggered. 2012-01-19 14:30:44 +00:00
Keir Mierle
274f8a0947 Fix broken asserts releaved by Clang. 2012-01-18 15:03:27 -08:00
Gael Guennebaud
589cc627f8 fixe one more VC10 ICE 2012-01-18 17:45:22 +01:00
Gael Guennebaud
db8f528737 fix VC10 ICE 2012-01-18 17:42:13 +01:00
Jitse Niesen
d6bf9f848a Correct description of rankUpdate() in quick reference guide.
Thanks to Sameer Agarwal for pointing out this mistake.
(transplanted from bc0fc5d21e
)
2012-01-09 12:57:11 +00:00
Keir Mierle
2d4fee0b40 Fix out-of-range int constant in 4x4 inverse.
(transplanted from 45bcad41b4
)
2012-01-05 23:15:09 -08:00
Gael Guennebaud
e7ef367db1 suppress unused variable warnings 2012-01-06 09:02:06 +01:00
Gael Guennebaud
bdee0c9baa set the default number of iteration to the size of the problem 2011-12-27 16:38:05 +01:00
Gael Guennebaud
15ea999f84 pushed too fast the previous one 2011-12-23 23:22:31 +01:00
Gael Guennebaud
901bcdd2a8 the previous test works for Dynamic sizes only 2011-12-23 23:16:43 +01:00
Gael Guennebaud
96a18ef230 add a reconstruction test 2011-12-23 23:15:08 +01:00
Gael Guennebaud
8171adb7ff fix bug #398, the quaternion returned by slerp was not always normalized,
add a proper unit test for slerp
2011-12-23 22:39:32 +01:00
Gael Guennebaud
67ae94f3a2 fix compilation of sparse_basic unit test for complexes 2011-12-23 09:41:14 +01:00
Gael Guennebaud
e3e39ea26d suppress an 'unused variable' warning 2011-12-22 14:06:16 +01:00
Gael Guennebaud
2c03e6fccc evaluate 1D sparse expressions into SparseVector and make the sparse operator<< and dot honor nested types 2011-12-22 14:01:06 +01:00
Gael Guennebaud
7f04845023 fix assignment of a row-major sparse vector to a column major sparse one 2011-12-22 11:53:47 +01:00
Gael Guennebaud
e4cea957df fix bug #391: prune was for compressed format only, now it also turns the matrix into compressed form 2011-12-20 18:37:24 +01:00
Gael Guennebaud
7e866c447f fix bug #391: improper stream output for uncompressed mode, also avoid double debugging outputs for column major matrices 2011-12-20 18:31:00 +01:00
Gael Guennebaud
6f92b75874 add aliasing test for sparse*sparse product 2011-12-20 18:10:22 +01:00
Gael Guennebaud
50d756b9ea fix bug #394: innerVector::nonZeros() was broken for uncompressed mode 2011-12-20 18:10:02 +01:00
Gael Guennebaud
15d781b64c we need to define EXTRACT_ALL to YES to get doxygen see the whole hierarchy. Exclude internal::* from the doc. 2011-12-20 10:25:54 +01:00
Gael Guennebaud
fcc966b40d workaround doxygen limitation to follow the base class of PlainObjectBase 2011-12-19 22:13:11 +01:00
Gael Guennebaud
33e52a3943 rm local fill-in ratio estimation (was broken sometimes) 2011-12-16 16:29:46 +01:00
Gael Guennebaud
732a50d043 implement a more optimistic heuristic to predict the nnz of a saprse*sparse product 2011-12-16 15:59:44 +01:00
Gael Guennebaud
40c0f3af57 fig bug #396: add a static assertion on the storage order of a sparse-sparse coeff-wise binary op 2011-12-15 19:23:20 +01:00
Jitse Niesen
3db6455896 Remove evaluators for 2.1 release.
We plan to re-instate them when we branch 2.2 (see bug #388).
2011-12-14 21:23:43 +00:00
Gael Guennebaud
0308c11849 remove a file that was not intended to be committed 2011-12-13 08:42:48 +01:00
Jitse Niesen
1e7712771e Remove asserts that eigenvalue computation has converged (bug #354). 2011-12-12 17:17:38 +00:00
Gael Guennebaud
1aa6c7f122 fix sparse insertion example 2011-12-11 17:18:14 +01:00
Gael Guennebaud
d738bedc5b remove redundant declaration (fix compilation with clang 3.0) 2011-12-11 11:45:03 +01:00
Gael Guennebaud
f60e6f5ee8 s/compressed()/isCompressed() 2011-12-10 23:08:10 +01:00
Gael Guennebaud
594fd2d11d Cholmod: add support for uncompressed SparseMatrix objects 2011-12-10 22:53:31 +01:00
Gael Guennebaud
9d7d634897 add cholmod_support unit tests 2011-12-10 19:32:17 +01:00
Gael Guennebaud
f35708d2e0 enforce weak linking of xerbla 2011-12-10 19:30:36 +01:00
Gael Guennebaud
105e170d8b trivial compilation fix 2011-12-10 16:17:12 +01:00
Gael Guennebaud
2600ba1731 feature 297: s/intersectionPoint/pointAt, fix documentation, add a unit test 2011-12-10 12:17:42 +01:00
Andy Somerville
c06ae325a4 feature 297: add ParametrizedLine::intersectionPoint() and intersectionParam()
-> intersection() is deprecated
2011-12-10 11:58:38 +01:00
Igor Krivenko
36457178f9 bug #352:properly cast constants 2011-12-09 23:38:41 +01:00
Gael Guennebaud
d400a6245e fix compilation with EIGEN_NO_DEBUG 2011-12-09 23:42:39 +01:00
Gael Guennebaud
38277e8a9b feature 319: fix LDLT::rankUpdate for complex/upper, simply the algortihm, update copyrights 2011-12-09 23:08:38 +01:00
Tim Holy
2d7c3eea53 feature 319: Add update and downdate functionality to LDLT 2011-12-09 21:04:44 +01:00
Gael Guennebaud
37f304a2e6 add a "using MKL" documentation page, add a minimal documentation of PARDISO wrapper classes, refine a bit the EIGEN_USE_* logic 2011-12-09 16:52:37 +01:00
Sebastian Lipponer
fff25a4b46 Fix MSVC integer overflow warning 2011-12-09 10:39:10 +00:00
Gael Guennebaud
57c6bfba08 add missing CMakeLists.txt 2011-12-09 10:53:12 +01:00
Gael Guennebaud
081abb701d add user defined CXX and LINKER flag cmake variables for the unit tests 2011-12-09 10:50:13 +01:00
Gael Guennebaud
10447a7b57 mv blas.h to src/misc such that it would be possible to use any blas libraries,
however, this requires some more works:
 - add const qualifiers in the declarations of blas.h
 - add the possibility to add a suffix to blas function names
2011-12-09 10:40:35 +01:00
Gael Guennebaud
43cdd242d0 - split and rename defined tokens to enable the use of BLAS/Lapack/VML/etc
- include MKL headers outside the Eigen namespace.
2011-12-09 10:06:49 +01:00
karturov
015c331252 Intel(R) MKL support added.
* * *
License disclaimer changed to BSD license for MKL_support.h
* * *
Pardiso support fixed, test added.
blas/lapack tests fixed: Scalar parameter was added in Cholesky, product_matrix_vector_triangular remaned to triangular_matrix_vector_product.
* * *
PARDISO test was added physically.
2011-12-05 14:52:21 +07:00
Gael Guennebaud
e270a5656a fix min/max clash with clang's header by including fstream beforehand 2011-12-08 23:27:10 +01:00
Gael Guennebaud
86bb20c431 remove dead code 2011-12-08 23:22:28 +01:00
Gael Guennebaud
e36a4c880a suppress deprecated warning when compiling legacy tests 2011-12-08 23:15:07 +01:00
Gael Guennebaud
06450882ab add missing CMakeLists.txt in Splines 2011-12-08 23:12:39 +01:00
Jitse Niesen
dd232e30b0 Document QuaternionBase, minor doc improvements.
* Document class QuaternionBase so that docs for members are displayed.
* Remove obsolete \redstar refering to Array module
* Fix typo in Constants.h
* Document EIGEN_NO_AUTOMATIC_RESIZING
2011-12-08 14:22:06 +00:00
Gael Guennebaud
a1fa05f14e improve compiler name and version detection 2011-12-07 13:20:52 +01:00
Gael Guennebaud
a0da96e2f4 fix detection of ICC version 2011-12-06 22:07:20 +01:00
Gael Guennebaud
80f8ed9f9c improve compiler and architecture detection 2011-12-06 19:54:34 +01:00
Thomas Capricelli
c3ad1f9382 eigen_gen_docs: dont try to update permissions on server 2011-12-06 15:55:20 +01:00
Gael Guennebaud
6ec0af6dc7 Added tag 3.1.0-alpha1 for changeset e017f798eb 2011-12-06 15:53:46 +01:00
Gael Guennebaud
e017f798eb bump 2011-12-06 15:53:17 +01:00
Hauke Heibel
accae638b2 Fixed a typo. 2011-12-06 15:42:05 +01:00
Gael Guennebaud
84cf1b5b1d fix QuaternionBase::cast.
It did not work with clang, and I'm unsure how it worked for gcc/msvc since QuaternionBase was introduced
2011-12-05 14:13:59 +01:00
Gael Guennebaud
9ca673daed fix compilation with clang 2011-12-05 12:50:43 +01:00
Gael Guennebaud
dd504d6aae fix bug #223: SparseMatrix::Flags no longer encode triangularness information 2011-12-05 10:17:09 +01:00
Gael Guennebaud
59576014a9 fig bug #373: compilation error with clang 2.9 when exceptions are disabled (cannot reproduce with clang 3.0 or 3.1) 2011-12-05 09:44:25 +01:00
Gael Guennebaud
b60624dc2a fix bug #384: add a static assertion on the Index type which has to be signed 2011-12-04 22:14:53 +01:00
Gael Guennebaud
82f9aa194d fix bug #294: add a diagonal() method to SparseMatrix (const) 2011-12-04 21:49:21 +01:00
Gael Guennebaud
69966e90e1 fix bug #221: remove the dense to SparseVector conversion ctor. 2011-12-04 21:15:46 +01:00
Gael Guennebaud
5dc9650f11 fix bug #281: replace csparse macros by template functions 2011-12-04 19:15:23 +01:00
Hauke Heibel
a8a2bf3b5a Added docs to the spline module. 2011-12-04 18:44:01 +01:00
Gael Guennebaud
9bd902ed9c fix bug #341: trisove on MappedSparseMatrix 2011-12-04 14:57:43 +01:00
Gael Guennebaud
9353bbac4a fix bug #356: fix TriangularView::InnerIterator for unit diagonals 2011-12-04 14:39:24 +01:00
Gael Guennebaud
32917515df make the accessors to internal sparse storage part of the public API and remove their "_" prefix. 2011-12-04 12:19:26 +01:00
Gael Guennebaud
1cdbae62db add SparseVector::ReverseInnerIterator 2011-12-04 09:56:40 +01:00
Gael Guennebaud
91e392a042 add ReverseInnerIterators to loop over the elements in reverse order,
and partly fix bug #356 (issue in trisolve for upper-column major))
2011-12-03 23:49:37 +01:00
Gael Guennebaud
a09cc5d4c0 fix bug #282: add the possibiliry to shift the diagonal coefficients via a linear function. 2011-12-03 18:26:08 +01:00
Gael Guennebaud
c861e05181 fix matrix names in the insertion example 2011-12-03 18:14:51 +01:00
Gael Guennebaud
9ae606866c Eigen2sSupport: import some fixes from the 3.0 branch (MSVC fix) 2011-12-03 17:45:07 +01:00
Gael Guennebaud
950eeab4d7 RandomSetter: turns the matrix into compressed form before the filling 2011-12-03 17:35:21 +01:00
Gael Guennebaud
c0e36516f3 add a command to fix the permission of the uploaded documentation 2011-12-03 11:18:20 +01:00
Gael Guennebaud
3f56de2628 improve sparse manual 2011-12-03 10:26:00 +01:00
Gael Guennebaud
e759086dcd improve documentation of some sparse related classes 2011-12-02 19:02:49 +01:00
Gael Guennebaud
4ca89f32ed Sparse matrix insertion:
- automatically turn a SparseMatrix to uncompressed mode when calling insert(i,j).
 - now coeffRef insert a new element when it does not already exist
2011-12-02 19:00:16 +01:00
Gael Guennebaud
f10bae74e8 - move CompressedStorage and AmbiVector into internal namespace
- remove innerVectorNonZeros(j) => use innerVector(j).nonZeros()
2011-12-02 10:00:24 +01:00
Jitse Niesen
a0bcaa88af Extend tutorial page on broadcasting to reflect recent changes. 2011-12-01 21:16:07 +00:00
Gael Guennebaud
b85bcd91bf remove GSL dependency in the unit tests 2011-12-01 18:17:19 +01:00
Gael Guennebaud
7aaae9d6df remove useless blas reference code 2011-12-01 18:10:12 +01:00
Gael Guennebaud
3a4c78b588 add code for band triangular problems:
- currently available from the BLAS interface only
 - and for vectors only
2011-12-01 18:06:28 +01:00
Gael Guennebaud
9fdb6a2ead output error messages in blas unit tests 2011-12-01 18:04:01 +01:00
Hauke Heibel
b00a33bc70 Integrated spline class and simple spline fitting 2011-11-25 14:53:40 +01:00
Gael Guennebaud
49d652c600 fix assigment from uncompressed 2011-11-30 21:55:54 +01:00
Gael Guennebaud
6b8d6887ac bug fix in SparseSelfAdjointTimeDenseProduct for empty rows or columns 2011-11-30 19:39:20 +01:00
Gael Guennebaud
00d4a360ba bug fix in SparseView::incrementToNonZero 2011-11-30 19:31:11 +01:00
Gael Guennebaud
d1b54ecfa3 add more support for uncompressed mode 2011-11-30 19:24:43 +01:00
Gael Guennebaud
cda397b117 cleanning pass on the sparse modules:
- remove outdated/deprecated code
 - improve a bit the documentation
2011-11-28 16:36:37 +01:00
Gael Guennebaud
2d621d235d fix alignment computation in Block and MapBase such that aligned means aligned on 16 bytes and nothing else
(transplanted from dcb36e3d49
)
2011-11-28 13:43:10 +01:00
Marc Glisse
a2810aa32f bug #383 - another c++11-user-defined-literal fix 2011-11-27 15:27:25 -05:00
Marc Glisse
8107b3da75 bug #383 - EIGEN_ASM_COMMENT broken in C++11
this is due to the new user-defined literals syntax.
2011-11-26 17:55:18 -05:00
Gael Guennebaud
f56316f7ed add two alternative solutions to the problem of fixed size members 2011-11-25 13:46:48 +01:00
Gael Guennebaud
70206ab1e1 draft of the new sparse manual reflecting the new sparse module 2011-11-24 17:32:30 +01:00
Gael Guennebaud
57d1ccb2dc fix compilation of doc (broken by changeset bc6d78982f
- General tightening/testing of vectorwise ops)
2011-11-24 17:30:55 +01:00
Gael Guennebaud
2d4fe54b73 fix CG example 2011-11-24 08:19:13 +01:00
Gael Guennebaud
01b4b6e456 improve accuracy of 3x3 direct eigenvector extraction 2011-11-23 22:43:40 +01:00
Gael Guennebaud
be9b87377f typo 2011-11-23 08:30:10 +01:00
Jitse Niesen
63dcdb65fd Install eigen3.pc in default directory if pkgconfig not found (bug #358). 2011-11-22 17:30:35 +00:00
Benoit Jacob
ffe6d1f901 Alignment fixes:
* Fix AlignedBit computation for Plain Objects
 * use it for the conditional alignment of operator new
 * only overload new in PlainObjectBase, don't overload again in Matrix and Array
2011-11-22 09:04:31 -05:00
Gael Guennebaud
f278a3eaba stop fill pivoting LU only if the pivot is exactly 0 2011-11-22 09:18:54 +01:00
Benoit Jacob
bc6d78982f Bugs 157 and 377 - General tightening/testing of vectorwise ops:
* add lots of static assertions making it very explicit when all these ops
are supposed to work:
** all ops require the rhs vector to go in the right direction
** all ops already require that the lhs and rhs are of the same kind
(matrix vs vector) otherwise we'd have to do complex work
** multiplicative ops (introduced Kibeom's patch) are restricted to arrays, if only because for matrices they could be ambiguous.

* add a new test, vectorwiseop.cpp.

* these compound-assign operators used to be implemented with for loops:

   for(Index j=0; j<subVectors(); ++j)
     subVector(j).array() += other.derived().array();

This didn't seem to be needed; replaced by using expressions like operator+ and operator- did.
2011-11-18 11:10:27 -05:00
Kibeom Kim
de22ad117c bug #157 - Implemented *= /= * / operations for VectorwiseOp (e.g. mat.colwise()) 2011-11-17 17:57:45 -05:00
Jitse Niesen
08c0edae86 Move EIGEN_USING_MATRIX_TYPEDEFS macros to Eigen2Support. 2011-11-16 14:32:50 +00:00
Dennis Schridde
db36e4204f [Geometry/AlignedBox] New typedefs, like for Core/Matrix
Includes 1-4 and dynamic sized boxes for int, float and double type.
Also changes the tests to use these typedefs.
2011-11-09 22:12:28 +01:00
Gael Guennebaud
8fbbbe7521 fix some include paths 2011-11-16 09:27:38 +01:00
Gael Guennebaud
cb2f1944e2 add the new module headers 2011-11-12 15:22:35 +01:00
Gael Guennebaud
53fa851724 move sparse solvers from unsupported/ to main Eigen/ and remove the "not stable yet" warning 2011-11-12 14:11:27 +01:00
Gael Guennebaud
dcb66d6b40 fix ei_add_property 2011-11-12 10:54:16 +01:00
Gael Guennebaud
3e4a68cc60 optimize vectorized reductions by peeling the loop:
- x2 for squaredNorm() on double
 - peeling the loop with a peeling factor of 4 leads to even better perf
   for large vectors (e.g., >64) but it makes more difficult to keep good performance on smaller ones.
2011-11-12 09:19:48 +01:00
Gael Guennebaud
c110abb7d2 fix performance issue with SPMV 2011-11-11 06:04:31 +01:00
Gael Guennebaud
9d82a7e204 merge with hauke/eigen-cdash-improvements branch 2011-11-09 21:19:05 +01:00
Dennis Schridde
3a82aa1133 [Core/Matrix] Fix: Clear the right typedef macro 2011-11-09 12:25:55 +00:00
Gael Guennebaud
fb3aa7220f reimplement abs2 not to use std::norm which is incredibly slow. 2011-11-08 22:42:51 +01:00
Jitse Niesen
45a6bb34c3 Add simple example on how to compute Cholesky decomposition. 2011-11-07 17:14:06 +00:00
Marton Danoczy
f422668d39 Patches to support ARM NEON with Clang 3.0 and LLVM-GCC 2011-11-04 16:37:10 +01:00
Benoit Jacob
1b98b73472 Refactor force-inlining macros and use EIGEN_ALWAYS_INLINE to force inlining of the integer overflow helpers, whose non-inlining caused major performance problems, see the mailing list thread 'Significant perf regression probably due to bug #363 patches' 2011-11-06 16:27:41 -05:00
Benoit Jacob
aa3e420df5 Add test for Matrix(x, y) ctor static assert added in previous changeset 2011-11-06 00:44:04 -04:00
Benoit Jacob
ab3f138b23 In the Matrix constructor taking (rows, cols), statically assert that the types are integer.
The 2D vector ctor taking (x, y) is not concerned.
2011-11-05 23:56:48 -04:00
Gael Guennebaud
478de03bd8 fix a couple of warnings in the unit tests 2011-11-05 23:30:49 +01:00
Gael Guennebaud
cdd3e85060 Automatically produce a tgz archive of the documentation. 2011-11-05 21:59:36 +01:00
Gael Guennebaud
b4d1d4a2e0 completely remove EIGEN_BUILD_BLAS_LAPACK option 2011-11-05 13:26:53 +01:00
Gael Guennebaud
c5ddaf0c87 fix compilation 2011-11-05 10:54:05 +01:00
Gael Guennebaud
1de769d122 remove deprecated assert 2011-11-04 14:42:54 +01:00
Gael Guennebaud
05de3dddca use runtest.sh script iif /bin/bash does exist 2011-11-03 17:37:25 +01:00
Gael Guennebaud
94d87abbdb fix fftw cmake stuff 2011-11-03 15:33:42 +01:00
Jitse Niesen
a594ac3966 Allow for more iterations in SelfAdjointEigenSolver (bug #354).
Add an assert to guard against using eigenvalues that have not converged.
Add call to info() in tutorial example to cover non-convergence.
2011-11-02 14:18:20 +00:00
Gael Guennebaud
57207239f3 Mention that the axis in AngleAxis have to be normalized. 2011-11-01 09:40:51 +01:00
Jan Oberländer
fa7c08a831 bug #365 - Rename B0 in GeneralBlockPanelKernel.h to avoid name clash
with termios.h on POSIX systems.
2011-10-31 10:44:09 -04:00
Benoit Jacob
0cf2a05f3e bug #365 - Add test for non-usage of B0 2011-10-31 10:44:06 -04:00
Benoit Jacob
9df2f5c923 bug #369 - Quaternion alignment is broken
The problem was two-fold:
 * missing aligned operator new
 * Flags were mis-computed, the Aligned constant was misused
2011-10-31 09:23:41 -04:00
Benoit Jacob
0609dbeec6 fix more variable-set-but-not-used warnings on gcc 4.6 2011-10-31 00:51:36 -04:00
Benoit Jacob
6a1caf0351 Fix some unused-variable warnings with GCC 4.6 2011-10-30 23:55:20 -04:00
Adolfo Rodriguez Tsourouksdissian
4477843bdd bug #206 - part 4: Removes heap allocations from JacobiSVD and its preconditioners 2011-10-30 23:55:20 -04:00
Adolfo Rodriguez Tsourouksdissian
5e431779f3 bug #206 - part 3: Reimplement FullPivHouseholderQR<T>::matrixQ() using ReturnByValue 2011-03-08 19:04:31 +01:00
Adolfo Rodriguez Tsourouksdissian
7bf0e8cd82 bug #206 - part 2: For HouseholderSequence objects, added non-allocating versions of evalTo() and applyThisOnTheRight/Left that take additional working vector parameters. 2011-10-30 23:55:16 -04:00
Benoit Jacob
bca18a13ea The most important inline keyword ever? Without it, gcc failed to inline this function, which is called by all matrix constructors... 2011-10-25 20:45:26 -04:00
Gael Guennebaud
d7e70edfb3 remove the MSVC specific blas/lapack option 2011-10-24 13:40:01 +02:00
Gael Guennebaud
e44c19d1cc hopefully this workaround of cmake bug #9220 works for MSVC too 2011-10-24 13:36:49 +02:00
Gael Guennebaud
1ddf88060b update sparse*sparse product: the default is now a conservative algorithm preserving symbolic non zeros. The previous with auto pruning of the small value is avaible doing: (A*B).pruned() or (A*B).pruned(ref) or (A*B).pruned(ref,eps) 2011-10-24 11:44:53 +02:00
Gael Guennebaud
a997dacc67 mark deprecated sparse solvers as so. 2011-10-24 09:51:02 +02:00
Gael Guennebaud
39d4585bff add the possiibility to disable deprectated warnings (useful for deprecated unit tests!) 2011-10-24 09:40:37 +02:00
Gael Guennebaud
5d43b4049d factorize solving with guess 2011-10-24 09:33:24 +02:00
Gael Guennebaud
70df09b76d move DynamicSparseMatrix to SparseExtra 2011-10-24 09:31:33 +02:00
Gael Guennebaud
a2d414f568 move the blas.h header to blas/ and remove declaration of function returning a complex 2011-10-19 16:29:43 +02:00
Benoit Jacob
de69129f56 forgot inline keyword 2011-10-17 08:49:59 -04:00
Benoit Jacob
16b638c159 Throw std::bad_alloc even when exceptions are disabled, by doing new int[size_t(-1)].
Don't throw exceptions on aligned_malloc(0) (just because malloc's retval is null doesn't mean error, if size==0).
Remove EIGEN_NO_EXCEPTIONS option, use only compiler standard defines. Either exceptions are enabled or they aren't.
2011-10-17 08:44:44 -04:00
Benoit Jacob
dcbc985a28 bug #363 - add test for integer overflow in size computations 2011-10-16 16:12:19 -04:00
Benoit Jacob
739559b08a bug #363 - check for integer overflow in size=rows*cols computations 2011-10-16 16:12:19 -04:00
Benoit Jacob
0c6055c285 bug #363 - check for integer overflow in byte-size computations 2011-10-16 16:12:19 -04:00
Gael Guennebaud
c1170d2e93 update the decomposition catalogue 2011-10-14 21:21:38 +02:00
Gael Guennebaud
3fce43a704 add a basic ILU preconditioner 2011-10-11 20:41:43 +02:00
Gael Guennebaud
a5761d6dd7 fix sparse tri-solve for full matrices 2011-10-11 20:35:52 +02:00
Gael Guennebaud
15cb4f5b09 extend BiCGSTAB to arbitrary rhs 2011-10-11 19:53:18 +02:00
Gael Guennebaud
21d27c6f71 add proper bicgstab unit test 2011-10-11 19:38:36 +02:00
Gael Guennebaud
cd3c2451b6 add a unit test for permutation applied to sparse objects 2011-10-11 13:45:27 +02:00
Gael Guennebaud
3172749f32 refactor sparse solving unit tests 2011-10-11 11:32:26 +02:00
Gael Guennebaud
4f237f035c extend SimplicialCholesky for sparse rhs, and add determinant 2011-10-11 11:31:12 +02:00
Gael Guennebaud
5dc8458293 extend CG for multiple right hand sides 2011-10-11 11:29:50 +02:00
Gael Guennebaud
b94c00226f make it compatible with Diagonal<> 2011-10-11 11:28:13 +02:00
Gael Guennebaud
ae9c96a32d fix assignment to a set of sparse inner vectors 2011-10-10 16:16:37 +02:00
Gael Guennebaud
4e7f38ffc7 fix nesting 2011-10-09 22:19:01 +02:00
Gael Guennebaud
e97879857b DiagonalPrecond: fix potential segfault in case the diagonal contains explciit zeros 2011-10-09 22:17:37 +02:00
Gael Guennebaud
1beb8a6564 add a generic unit test for sparse SPD problems 2011-10-09 21:50:02 +02:00
Gael Guennebaud
2fc1b58cd2 split SimplicialCholesky into SimplicialLLt and SimplicialLDLt classes and add specific factor access functions 2011-10-09 21:45:55 +02:00
Hauke Heibel
e1dec359ba Configured unsupported/test/mpreal/*.* as CRLF files. 2011-10-04 11:57:49 +02:00
Hauke Heibel
b96d0bd240 Added a flag to build blas/lapack. 2011-10-04 11:23:55 +02:00
Gael Guennebaud
683ea3c93f fix superLU when the salver is called multiple times 2011-09-27 18:30:53 +02:00
Jitse Niesen
ac3ad9c1e7 Convert tabs to spaces. 2011-09-27 15:47:04 +01:00
Jitse Niesen
17c321617d Fix bug #286: Infinite loop in JacobiSVD with denormals 2011-09-27 14:25:02 +01:00
Bram de Jong
961a825b97 Add method which returns worst time (and make some methods const). 2011-09-26 14:39:23 +01:00
Gael Guennebaud
9bba0e7ba1 clean sparse LU tests 2011-09-24 17:15:37 +02:00
Gael Guennebaud
b2988375e8 fix a couple of issues in SuperLU support (memory and determinant) 2011-09-24 14:20:31 +02:00
Gael Guennebaud
6799fabba9 port umfpack support to new API 2011-09-24 14:19:39 +02:00
Gael Guennebaud
d8ae978b65 fix some compilation issues 2011-09-23 16:28:26 +02:00
Gael Guennebaud
823b2105b6 fix atan2 when tmp4==0 2011-09-22 17:34:25 +02:00
Gael Guennebaud
b0adbfbae7 BiCGSTAB does not like starting from 0... 2011-09-21 18:08:08 +02:00
Gael Guennebaud
c331c092d5 no comment 2011-09-21 14:20:41 +02:00
Gael Guennebaud
7301f4345c quick workaround of MSVC9' ICE in pset1 2011-09-21 14:18:41 +02:00
Gael Guennebaud
83563dee3c find macport' umfpack/cholmod 2011-09-21 10:28:09 +02:00
Gael Guennebaud
ebfed5a512 Enable incomplete BLAS/Lapack builds when no fortran compiler has been found.
Works here with gcc. Hopefully this will work for MSVC too.
2011-09-21 10:27:38 +02:00
Gael Guennebaud
1d796acb05 fix status after initialization 2011-09-20 18:45:50 +02:00
Gael Guennebaud
5d1836b182 accept both STL and Eigen's containers for reserve() 2011-09-20 02:04:03 +02:00
Jitse Niesen
e0a6ce50dd Typo in geometry tutorial. 2011-09-19 21:57:26 +01:00
Jitse Niesen
2092b45d0d Bug fix for matrix1 * matrix2 * scalar1 * scalar2.
See report on http://forum.kde.org/viewtopic.php?f=74&t=96947 .
2011-09-19 15:07:19 +01:00
Chen-Pang He
16b13596a6 mainly enhance MatrixLogarithm's performance for RealScalar != double 2011-09-17 21:00:55 +08:00
Gael Guennebaud
edf4c4b217 add support for macosx 2011-09-17 10:57:27 +02:00
Gael Guennebaud
9053729d68 add a bi conjugate gradient stabilized solver 2011-09-17 10:54:14 +02:00
Gael Guennebaud
f4122e9f94 add tan, acos, asin 2011-09-14 08:35:54 +02:00
Jitse Niesen
6b006772f1 Fix LDLT::solve() if matrix singular but solution exists (bug #241).
Clarify this in docs and add regression test.
2011-09-11 06:30:53 +01:00
Jitse Niesen
59b83c14fd Write page on template and typename keywords in C++.
After yet another question on the forum, I decided to write something on this
common issue. Now we just need to link to this and get people to read it.
Thanks to mattb on the forum for some links. Caveat: IANALL (I am not a
language lawyer).
2011-09-10 09:18:18 +01:00
Gael Guennebaud
3e7aaadb1d fix bench_gemm 2011-09-09 10:36:20 +02:00
Gael Guennebaud
d52d8e4a53 reactivate the sorting in the experimental sparse-sparse product 2011-09-08 13:43:32 +02:00
Gael Guennebaud
7706bafcfd add the possibility to reserve room for inner vector in SparseMatrix 2011-09-08 13:42:54 +02:00
Jitse Niesen
7898281b2b Put docs for unsupported modules in right place.
Doxygen was confused by the unsupported modules being partly in the doc/
directly, instead of completely in unsupported/doc/ . Thus, the link to
the unsupported modules on the server did not work (I think this manifested
itself after doxygen was upgraded on the server).
2011-09-07 04:19:12 +01:00
Jitse Niesen
b38d3b360e Define log2() on FreeBSD (fixes bug #343). 2011-09-06 06:52:04 +01:00
Gael Guennebaud
f1d98aad1b add atan2 support in AutoDiff and remove superfluous std:: specializations 2011-09-05 17:47:58 +02:00
Gael Guennebaud
063042bca3 Merged in trevorw/eigen (pull request PR-7) 2011-09-05 10:55:49 +02:00
Jitse Niesen
477d3e5726 Update docs of PlainObjectBase::Map(); fixes bug #335.
Also fix some typos.
2011-09-03 15:18:21 +01:00
Jitse Niesen
a2feb6f3c7 Add defensive assert to MatrixExponential, 2011-09-03 04:58:06 +01:00
Chen-Pang He
dd598ef8ce enhance efficacy via avoiding exception handling 2011-09-02 00:15:02 +08:00
Trevor Wennblom
6b31aa4bd1 resolve pkgconfig destination - #338 2011-08-30 19:15:16 -05:00
Jitse Niesen
7ee084f82f Leverage triangular square root in matrix log. 2011-08-25 07:42:32 +01:00
Jitse Niesen
c01ed935dd Split code for (quasi)triangular matrices from MatrixSquareRoot.
This way, (quasi)triangular matrices can avoid the costly Schur decomposition.
2011-08-25 07:42:21 +01:00
Chen-Pang He
8ddd1e390b fix: <ctime> is necessary for srand(time(NULL)) 2011-08-24 18:26:38 +08:00
Gael Guennebaud
8414be739b fix bug #330: Index to int conversion warning 2011-08-23 11:02:10 +02:00
Gael Guennebaud
b3f5fbbd9a oops EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION now perfroms full specialization,
no need for the typename keywords
2011-08-22 10:48:04 +02:00
Gael Guennebaud
b85c89c313 fix bug #262: Compilation error of stdvector_overload test with GCC 4.6
Now our aligned allocator is automatically activatived only when the user
did not specified an allocator (or specified the default std::allocator).
2011-08-22 10:12:10 +02:00
Jitse Niesen
9bf4d709e4 Fix failures in redux test caused by underflow in .prod() test. 2011-08-21 00:51:15 +01:00
Jitse Niesen
9e667e28f5 Add coverage for long double to matrix_exponential test. 2011-08-21 00:20:29 +01:00
Chen-Pang He
6d7a32231d add compatibility with long double 2011-08-20 12:33:51 +08:00
Gael Guennebaud
ea4a1960f0 mv the mpreal copy in its own folder 2011-08-19 15:08:29 +02:00
Gael Guennebaud
79ad55a901 update to latest mpreal and fix a min/max issue in mprel.h 2011-08-19 15:03:45 +02:00
Gael Guennebaud
42e2578ef9 the min/max macros to detect unprotected min/max were undefined by some std header,
so let's declare them after and do the respective fixes ;)
2011-08-19 14:18:05 +02:00
Gael Guennebaud
5734ee6df4 add the possibility to specialize assign_impl and still call the default implementations.
(yes I know this change will be deprecated as soon as the evaluators will be in shape but I need this now)
2011-08-18 10:19:25 +02:00
Gael Guennebaud
ca7d3dca79 fix linking issue 2011-08-12 22:38:53 +02:00
Gael Guennebaud
f162f7c323 fix a numerical issue in the direct 3x3 eigenvector extraction 2011-08-08 10:46:26 +02:00
Thomas Capricelli
a660e6425c fix a bug where some rotations were not initialized
They actually were in the original minpack code, this is a bug introduced
by our migration.
Reported on #322 and
http://forum.kde.org/viewtopic.php?f=74&t=96197#p201158
2011-08-04 05:02:04 +02:00
Thomas Capricelli
5748d3c96f wa2 was computed twice because of a confustion between changesets
746c787a76
 and ee0e39284c
.
Reported on forum:
http://forum.kde.org/viewtopic.php?f=74&t=96197#p201158
2011-08-04 03:27:01 +02:00
Jitse Niesen
b12522f696 Remove unnecessary template keywords (breaks compilation under MSVC).
Thanks to Hauke for finding this.
2011-07-28 13:55:56 +01:00
Hauke Heibel
3431c052c6 Improved compilation errors for Transform initialization/assignment with different numeric types. 2011-07-28 09:35:17 +02:00
Gael Guennebaud
3a2cabc275 compilation fix with conjugate_gradient_solve_retval_with_guess 2011-07-26 14:43:20 +02:00
Gael Guennebaud
51f706b916 add the possibility to configure the preconditioner 2011-07-26 09:22:18 +02:00
Gael Guennebaud
66fa6f39a2 add a naive IdentityPreconditioner 2011-07-26 09:17:18 +02:00
Gael Guennebaud
80b1d1371d add a conjugate gradient solver 2011-07-26 09:04:10 +02:00
Gael Guennebaud
8fa7e92e77 fix sparse selfadjoint time dense such that the other triangular part is not used at all 2011-07-26 09:02:41 +02:00
Gael Guennebaud
97ac0fd192 fix eigen2 support min/max garbage 2011-07-22 11:37:41 +02:00
Gael Guennebaud
e8313364c1 simplify a bit the 2x2 direct eigenvalue solver 2011-07-22 11:21:43 +02:00
Gael Guennebaud
47a2bca89f integrate Hauke's 2x2 direct symmetric eigenvalues solver 2011-07-22 09:43:14 +02:00
Gael Guennebaud
26d7dad138 add a computeDirect method to SelfAdjointEigenSolver for fast eigen decomposition 2011-07-21 19:07:52 +02:00
Gael Guennebaud
22bff949c8 protect calls to min and max with parentheses to make Eigen compatible with default windows.h
(transplanted from 49b6e9143e
)
2011-07-21 11:19:36 +02:00
Gael Guennebaud
d4bd8bddb5 fix bug #320 (pretty gdb printer on mingw) 2011-07-20 11:15:42 +02:00
Hauke Heibel
705023fd85 Translation * RotationBase now returns an isometric transformation. 2011-07-19 11:13:40 +02:00
Gael Guennebaud
3fb65734ab fix triangular unit test: it only accepts small matrices 2011-07-19 10:45:42 +02:00
Gael Guennebaud
22cc2b727b fix trmv unit test 2011-07-19 10:44:44 +02:00
Gael Guennebaud
38a4e3053d fix LLT rank one update for "upper" hermitian matrices 2011-07-19 10:09:43 +02:00
Gael Guennebaud
0d02182ae8 add an "InvalidInput" enum, used by the SuperLU interface 2011-07-18 13:37:41 +02:00
Gael Guennebaud
a8f66fec65 add the possibility to configure the maximal matrix size in the unit tests 2011-07-12 14:41:00 +02:00
Gael Guennebaud
bdb545ce3b enable instalation of blas and lapack libs 2011-07-11 17:02:09 +02:00
Gael Guennebaud
5fdebc2fa5 fix bug #316 - SelfAdjointEigenSolver::compute does not handle matrices of size (1,1) correctly 2011-07-09 07:15:14 +02:00
Thomas Capricelli
08074843ac fix few warnings reported by clang 2011-07-07 22:20:04 +02:00
Gael Guennebaud
c52268c649 suppress polluting EMPTY macro defined by SuperLU 2011-07-07 16:42:51 +02:00
Gael Guennebaud
2489c81562 add new interface to SuperLU 2011-07-07 14:19:42 +02:00
Gael Guennebaud
c98cd5e564 fix constness of intersection methods (bug #309) 2011-06-27 13:15:01 +02:00
Jitse Niesen
0b308e79c4 Add DenseStorage specializations for dynamic size with MaxSize = 0 (bug #288).
This is necessary for instantiations like Matrix<float,Dynamic,Dynamic,0,0,0>.
2011-06-24 13:47:11 +01:00
Jitse Niesen
16db255333 Fix compilation of cholesky rank update test. 2011-06-24 13:41:23 +01:00
Thomas Capricelli
9b52fe0432 fix typo in doc for ParametrizedLine 2011-06-23 00:36:24 +02:00
Gael Guennebaud
3ecf7e8f6e add a KroneckerProduct module (unsupported) from Kolja Brix and Andreas Platen materials. 2011-06-22 14:39:11 +02:00
Gael Guennebaud
7aabce7c76 rm confusing sentence 2011-06-17 09:46:05 +02:00
Tim Holy
16a2d896bc Relatively straightforward changes to wording of documentation, focusing particularly on the sparse and (to a lesser extent) geometry pages. 2011-06-20 22:47:58 -05:00
Tim Holy
4a95badf74 A first tiny test commit: fix a spelling error in the documentation. 2011-06-19 14:39:19 -05:00
Gael Guennebaud
2f32e48517 New feature: add rank one update in Cholesky decomposition 2011-06-20 15:05:50 +02:00
Gael Guennebaud
a55c27a15f fix documentation of norm 2011-06-18 08:30:34 +02:00
Zach Ploskey
642d452921 Suggest placing Eigen directory in system include path. 2011-06-17 15:46:50 -07:00
Zach Ploskey
e3491beb48 Fixed a few typos and cleaned up some language. 2011-06-17 15:42:15 -07:00
Benoit Jacob
a871f3cdb8 adapt test to the change reverting normalize() to returning void 2011-06-15 10:00:43 -04:00
Benoit Jacob
aedccbf52f back out 842881cfb1 2011-06-15 09:59:10 -04:00
Benoit Jacob
d2673d89bd add test for normalize() and normalized() 2011-06-15 00:30:46 -04:00
Andy Somerville
842881cfb1 bug #298 - let normalize() return a reference to *this 2011-06-15 00:30:11 -04:00
Gael Guennebaud
40287d2fd9 remove the use of non standard long long 2011-06-14 10:56:47 +02:00
Gael Guennebaud
f82b3ea241 fix aligned_allocator::allocate interface 2011-06-14 08:50:25 +02:00
Thomas Capricelli
cf04a7c682 fix typo in constant name 2011-06-12 23:54:28 +02:00
Gael Guennebaud
6d3dee1b66 introduce a smart_copy internal function and fix sparse matrices with non POD scalar type 2011-06-09 19:04:06 +02:00
Jitse Niesen
8c8ab9ae10 Implement matrix logarithm + test + docs.
Currently, test matrix_function_1 fails due to bug #288.
2011-06-07 14:44:43 +01:00
Jitse Niesen
a6d42e28fe Decouple MatrixFunction and MatrixFunctionAtomic
in preparation for implementation of matrix log.
2011-06-07 14:40:27 +01:00
Jitse Niesen
86ca35ccff Fix and test MatrixSquareRoot for 1-by-1 matrices. 2011-06-07 14:32:16 +01:00
Gael Guennebaud
91fe1507d1 Sparse: more fixes regarding long int as index type 2011-06-07 11:28:16 +02:00
Gael Guennebaud
421ece38e1 Sparse: fix long int as index type in simplicial cholesky and other decompositions 2011-06-06 10:17:28 +02:00
Jitse Niesen
7a61a564ef Fix snippets for operator|| and && by adding pair of parens. 2011-06-03 11:17:08 +01:00
Gael Guennebaud
5bc4abc45e fix compilation with MinGW 2011-06-01 12:16:21 +02:00
Gael Guennebaud
562d3ea91d forgot to include this file in previous commit 2011-06-01 10:49:36 +02:00
Gael Guennebaud
35c1158ee3 add boolean || and && operators 2011-05-31 22:17:34 +02:00
Gael Guennebaud
b495203310 update URL 2011-05-31 19:07:15 +02:00
Gael Guennebaud
5830f90983 add read/write routines for sparse matrices in the Market format 2011-05-31 18:58:04 +02:00
Jitse Niesen
9d6fdbced7 Fix truncated instructions for printers.py
... as noted by kp0987 on forum
2011-05-30 16:15:11 +01:00
Gael Guennebaud
5b71d44e18 fix bug #278: geometry tutorial
(transplanted from 3cd1641dac
)
2011-05-28 22:12:15 +02:00
Gael Guennebaud
9464745385 do not directly call std::ceil 2011-05-28 16:46:38 +02:00
Gael Guennebaud
7b46d7ed0f finish to fix bug #270: we have to use EIGEN_ALIGN_STATICALLY and not EIGEN_DONT_ALIGN_STATICALLY... 2011-05-28 11:38:53 +02:00
Jitse Niesen
d23845c4cc Fix typo ('using namespace' instead of 'using'). 2011-05-26 09:52:36 +01:00
Gael Guennebaud
87ac09daa8 Simplify the use of custom scalar types, the rule is to never directly call a standard math function using std:: but rather put a using std::foo before and simply call foo:
using std::max;
max(a,b);
2011-05-25 08:41:45 +02:00
Gael Guennebaud
5541bcb769 bug #225: add a unit test for memory leak 2011-05-23 14:20:49 +02:00
Gael Guennebaud
117d17ee58 bug #271: fix copy/paste mistakes in doc
(transplanted from 145b9cad63101ee46924d446fa8b08ffb48c7f3a)
2011-05-23 13:39:26 +02:00
Gael Guennebaud
46bee5682f clean a bit previous patch (ctor vs static_cast and a few bits) 2011-05-23 13:34:04 +02:00
David H. Bailey
074b067624 fix implicit scalar conversions (needed to support fancy scalar types, see bug #276) 2011-05-23 11:20:13 +02:00
Gael Guennebaud
7209d6a126 fix gemv_static_vector_if on architectures that cannot aligned on the stack (e.g., ARM NEON) 2011-05-21 22:15:11 +02:00
Gael Guennebaud
96464f8563 clean several other assertion checking tests 2011-05-20 09:59:15 +02:00
Gael Guennebaud
501bc602ec fix vectorization_logic when EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 2011-05-19 21:52:40 +02:00
Gael Guennebaud
f2837aebc4 NEON: fix plset 2011-05-18 21:12:08 +02:00
Gael Guennebaud
8170ef0b2d add unit test for plset 2011-05-18 21:11:03 +02:00
Gael Guennebaud
7f2a88c91f NEON: disable unaligned assertion checking for non vectorized types 2011-05-18 14:11:40 +02:00
Gael Guennebaud
85c137ccd4 NEON: fix ploaddup 2011-05-18 08:15:47 +02:00
Gael Guennebaud
179d42bb2b fix bug #267: alloca is not aligned on arm 2011-05-17 21:30:12 +02:00
Gael Guennebaud
d4fd298fbb Autodiff: fix scalr - active_scalar 2011-05-14 22:38:41 +02:00
Jitse Niesen
9a06055870 Store light-weight objects in evaluators by value.
This resolves failure in unit test caused by dying temporaries.
2011-05-13 14:05:59 +01:00
Gael Guennebaud
a34a216e82 AutoDiff: add one missing operator- version 2011-05-12 23:40:19 +02:00
Gael Guennebaud
3de2f4b75a AutoDiff: fix most of bug #234 (missing operators, used old internal math function interface, etc) 2011-05-12 23:36:33 +02:00
Gael Guennebaud
ae3b6cc324 AutoDiff: fix unary operator- 2011-05-12 22:27:51 +02:00
Jitse Niesen
e22a523021 Remove Eigen::internal::sqrt(), see bug #264. 2011-05-12 16:52:56 +01:00
John Tytgat
0aa7425f15 fix bug #260: broken Qt support for Transform
(transplanted from 84c8b6d5c5
)
2011-05-11 22:31:36 +02:00
Jitse Niesen
0c463a21c4 Forgot to 'hg add' example file in last commit. 2011-05-10 09:59:58 +01:00
Jitse Niesen
d7e3c949be Implement and document MatrixBase::sqrt(). 2011-05-09 22:20:20 +01:00
Jitse Niesen
dac4bb640a Fix compilation error under GCC 4.5.
That version is stricter in forcing function prototype and definition
to match.
2011-05-09 13:57:06 +01:00
Jitse Niesen
837db08cbd Add test for sqrt() on complex Arrays.
From Gael's dashboard output of matrix_square_root test, I suspect the
test committed here may fail on old gcc.
2011-05-09 10:17:41 +01:00
Jitse Niesen
6e1573f66a Implement square root for real matrices via Schur. 2011-05-08 22:18:37 +01:00
Jitse Niesen
6b4e215710 Implement matrix square root for complex matrices.
I hope to implement the real case soon, but it's a bit more
complicated due to the 2-by-2 blocks in the real Schur decomposition.
2011-05-07 22:57:46 +01:00
Jitse Niesen
0896c6d97d Get rid of wrong "subscript above bounds" warning (bug #149). 2011-05-07 18:44:11 +01:00
Gael Guennebaud
4e7e5d09e1 s/n=n/EIGEN_UNUSED_VARIABLE(n) 2011-05-06 21:29:19 +02:00
Gael Guennebaud
fb76452cbc add missing .data() members to MatrixWrapper and ArrayWrapper 2011-05-06 21:15:05 +02:00
Gael Guennebaud
97b6d26f5b fix compilation on ARM NEON (missing AlignedOnScalar) 2011-05-06 09:03:48 +02:00
Thomas Capricelli
883219041f better fix for gcc 4.6.0 / ptrdiff_t, as suggested by Benoit 2011-05-05 18:48:18 +02:00
Thomas Capricelli
a18a1be42d Fix compilation with gcc-4.6.0, patch provided by Anton Gladky <gladky.anton@gmail.com>,
working on debian packaging.
2011-05-05 00:44:24 +02:00
Jitse Niesen
012419166e Bail out if preprocessor symbol Success is defined (bug #253). 2011-05-04 14:28:45 +01:00
Jitse Niesen
781e75cbd7 Document some more preprocessor symbols:
EIGEN_NO_MALLOC, EIGEN_RUNTIME_NO_MALLOC, eigen_assert.
2011-05-04 14:13:20 +01:00
Jitse Niesen
cc23b0a3d9 Remove unused enums in Constants.h . 2011-05-03 17:20:54 +01:00
Jitse Niesen
a96c849c20 Document enums in Constants.h (bug #248).
To get the links to work, I also had to document the Eigen namespace.
Unfortunately, this means that the word Eigen is linked whenever it appears
in the docs.
2011-05-03 17:08:14 +01:00
Gael Guennebaud
1947da39ab fix bug #258: asin/acos copy paste mistake 2011-05-02 13:26:44 +02:00
Hauke Heibel
10426b7647 Final working fix for the EOL extension.
MSVC debugger tools are now forced to CRLF.
2011-04-30 18:10:17 +02:00
Hauke Heibel
0358a8247c This should fix the eol extension. 2011-04-30 17:46:40 +02:00
Hauke Heibel
9e0c8549ce Fixed Unix script line ending conversions. 2011-04-30 17:35:51 +02:00
Jitse Niesen
06fb7cf470 Implement compound assignments using evaluator of SelfCwiseBinaryOp. 2011-04-28 16:57:35 +01:00
Jitse Niesen
3b60d2dbc4 Implement swap using evaluators. 2011-04-28 15:52:15 +01:00
Jitse Niesen
2d11041e24 Use copyCoeff/copyPacket in copy_using_evaluator. 2011-04-22 22:36:45 +01:00
Jitse Niesen
3457965bf5 Implement evaluator for Diagonal. 2011-04-22 22:36:45 +01:00
Jitse Niesen
f924722f3b Implement evaluators for Reverse. 2011-04-22 22:36:45 +01:00
Jitse Niesen
bb2d70d211 Implement evaluators for ArrayWrapper and MatrixWrapper. 2011-04-22 22:36:45 +01:00
Gael Guennebaud
6441e8727b fix aligned_stack_memory_handler for null pointers 2011-04-21 09:00:55 +02:00
Mathieu Gautier
392eb9fee8 Quaternion : add Flags on Quaternion's traits with the LvalueBit set if needed
Quaternion : change PacketAccess to IsAligned to mimic other traits
test : add a test and 4 failtest on Map<const Quaternion> based on Eigen::Map ones
2011-04-12 14:49:50 +02:00
Gael Guennebaud
f85db18c1c I doubt this change was intented to be committed
ss: Enter commit message.  Lines beginning with 'HG:' are removed.
2011-04-20 08:15:09 +02:00
Thomas Capricelli
50c00d14c8 be nice with the server : dont use -j3 2011-04-19 17:41:59 +02:00
Gael Guennebaud
e87f653924 fix bug #250: compilation error with gcc 4.6 (STL header files no longer include cstddef) 2011-04-19 16:34:25 +02:00
Gael Guennebaud
67d50f539b fix bug #242: vectorization was wrongly enabled on MSVC 2005 2011-04-19 15:25:00 +02:00
Eamon Nerbonne
e48bc0dfe3 WIN32 isn't defined ?? but _WIN32 is. 2011-04-19 14:37:04 +02:00
Jitse Niesen
0b40b36d10 Make MapBase(PointerType) constructor explicit (fixes bug #251) 2011-04-19 12:13:04 +01:00
Benoit Jacob
820545cddb fix unaligned-array-assert link 2011-04-18 06:35:54 -04:00
Jitse Niesen
c9b5531d6c Normalize eigenvectors returned by EigenSolver (fixes bug #249)
because the documentation says that we do this.
Also, add a unit test to cover this.
2011-04-15 17:39:59 +01:00
Jitse Niesen
e654405900 Implement unrolling in copy_using_evaluator() . 2011-04-13 11:49:48 +01:00
Jitse Niesen
7e86324898 Implement evaluator for PartialReduxExpr as a dumb wrapper. 2011-04-13 09:49:10 +01:00
Jitse Niesen
11164830f5 Implement evaluator for Replicate. 2011-04-12 22:54:31 +01:00
Jitse Niesen
12a30a982f Implement evaluator for Select. 2011-04-12 22:34:16 +01:00
Jitse Niesen
88b3116b99 Decouple AssignEvaluator.h from assign_traits from Assign.h 2011-04-12 13:35:08 +01:00
Gael Guennebaud
0c146bee1b enforce no inlining of the GEBP product kernel: this is a big
function that makes no sense to inline, though GCC was thinking
the opposite. This even slighlty improve the perf. And as a side
effect this workaround a weird GCC-4.4 linking bug (see
"Problem with g++-4.4 -O2 and Eigen3" in the ML)
2011-04-07 18:49:45 +02:00
Jitse Niesen
eae5a6bb09 Decouple Cwise*Op evaluators from expression objects 2011-04-05 18:30:51 +01:00
Jitse Niesen
11ea81858a Implement evaluator for CwiseUnaryView 2011-04-05 18:20:43 +01:00
Jitse Niesen
cca7b146a2 Implement evaluator for Map 2011-04-05 18:15:59 +01:00
Gael Guennebaud
a6b5314c20 Performance tunning for TRMM products 2011-04-05 11:20:50 +02:00
Jitse Niesen
ae06b8af5c Make evaluators for Matrix and Array inherit from common base class.
This gets rid of some code duplication.
2011-04-04 15:35:14 +01:00
Jitse Niesen
afdd26f229 Do some of the actual work in evaluator for Block.
Also, add simple accessor methods to Block expression class.
2011-04-04 13:44:50 +01:00
Gael Guennebaud
0d58c36ffd std::min/max are not implemented and they cannot be implemented easily 2011-04-04 16:26:43 +02:00
Jitse Niesen
70d5837e00 Correct typo in QuickReference doc, plus typographical improvements. 2011-04-01 16:58:51 +01:00
Gael Guennebaud
77a1373c3a fix trmm unit test 2011-03-31 15:32:21 +02:00
Jitse Niesen
d90a8ee8bd Evaluators: add Block evaluator as dumb wrapper, add slice vectorization. 2011-03-31 13:50:52 +01:00
Gael Guennebaud
b471161f28 fix typo and remove unused declaration. 2011-03-31 10:02:02 +02:00
Adam Szalkowski
969e92261d fix bug #239: the essential part was left uninitialized in some cases 2011-03-31 09:54:52 +02:00
Jitse Niesen
10dae8dd4d Add directory containing split_test_helper.h to include path. 2011-03-29 14:17:49 +01:00
Jitse Niesen
8175fe43e0 Evaluators: Make inner vectorization more similar to default traversal. 2011-03-28 21:29:47 +01:00
Gael Guennebaud
00991b5b64 extend trmm/trmv unit test to thoroughly check all configurations 2011-03-28 17:45:16 +02:00
Gael Guennebaud
4f1419e9c3 add the possibility to specify a list of sub-test suffixes in a compact way 2011-03-28 17:43:59 +02:00
Gael Guennebaud
6feb1d3c0b fix trmv for Strictly* triangular matrices and trapezoidal matrices 2011-03-28 17:42:26 +02:00
Gael Guennebaud
568478ffe5 fix trmm for some unusual trapezoidal cases (a dense set of columns or rows is zero) 2011-03-28 17:41:46 +02:00
Gael Guennebaud
f4ac7d2b43 automatically generate the CALL_SUBTEST_* macros 2011-03-28 17:39:05 +02:00
Jitse Niesen
b175bc464f Evaluators: Implement linear traversal, better testing. 2011-03-27 22:08:48 +01:00
Jitse Niesen
1b17a674dd Evaluators: Implement inner vectorization.
The implementation is minimal (I only wrote the functions called by
the unit test) and ugly (lots of copy and pasting).
2011-03-27 13:49:15 +01:00
Jitse Niesen
5c204d1ff7 Evaluators: Implement LinearVectorizedTraversal, packet ops in evaluators. 2011-03-25 16:30:41 +00:00
Gael Guennebaud
e6fa4a267a improve computation of the sub panel width 2011-03-24 23:42:25 +01:00
Gael Guennebaud
931814d7c0 improve performance of trsm 2011-03-24 23:19:53 +01:00
Jitse Niesen
c6ad2deead Bug fix in linspace_op::packetOp(row,col). Fixes bug #232.
Also, add regression test.
2011-03-24 10:42:11 +00:00
Gael Guennebaud
42bc1f77be impl basic product evaluator on top of previous one 2011-03-24 09:33:36 +01:00
Gael Guennebaud
abc8c0821c makes evaluator test use VERIFY_IS_APPROX 2011-03-23 17:23:56 +01:00
Gael Guennebaud
4ada45bc76 BTL: add eigen2 backend 2011-03-23 16:59:12 +01:00
Gael Guennebaud
7d24cf283a do not confuse Eigen3 or beta versions of Eigen3 with Eigen2 2011-03-23 16:58:45 +01:00
Gael Guennebaud
7bb4f6ae2f BTL: do not enable GOTO1 if GOTO2 was found 2011-03-23 16:28:43 +01:00
Gael Guennebaud
3ef0da6efb fix tridiagonalization action 2011-03-23 16:28:09 +01:00
Gael Guennebaud
816541d82c add a stupid Product<A,B> expression produced by prod(a,b), and implement a first version of its evaluator 2011-03-23 16:12:21 +01:00
Gael Guennebaud
cfd5c2d74e import evaluator works 2011-03-23 11:54:00 +01:00
Gael Guennebaud
611fc17894 add support for ublas 2011-03-23 11:39:35 +01:00
Gael Guennebaud
ec32d2c807 BTL: by default use current Eigen headers, and disable the novec version 2011-03-23 11:08:10 +01:00
Gael Guennebaud
b3e43246bc BTL: add a Eigen-blas backend 2011-03-23 11:00:31 +01:00
Gael Guennebaud
f9da1ccc3b BTL: clean the BLAS implementation 2011-03-23 10:35:54 +01:00
Gael Guennebaud
e35b1ef3f3 BTL: rm stupid backends 2011-03-23 10:07:24 +01:00
Gael Guennebaud
fe595e91ae update plot settings 2011-03-23 10:03:01 +01:00
Gael Guennebaud
9cca79f5ca update aat action to do a syrk operation, and remove (comment) ata action 2011-03-23 10:02:00 +01:00
Gael Guennebaud
da3f3586e0 BTl: GMM++ LU is not a full pivoting LU 2011-03-22 15:39:23 +01:00
Gael Guennebaud
22c7609d72 extend sparse product unit tests 2011-03-22 11:58:22 +01:00
Gael Guennebaud
5fda8cdfb3 fix 228 (ei_aligned_stack_delete does not exist anymore) 2011-03-21 21:59:42 +01:00
Benoit Jacob
eb9c6b6cfd merge 2011-03-21 06:46:27 -04:00
Benoit Jacob
bb8a25e94b fix typos 2011-03-21 06:45:57 -04:00
Gael Guennebaud
535a61ede8 port sparse LLT/LDLT to new stack allocation API 2011-03-20 17:10:43 +01:00
Benoit Jacob
eba023d082 make compile_snippet use Eigen/Dense 2011-03-20 11:48:53 -04:00
Gael Guennebaud
b8ecda5c66 clean a bit the stack allocation mechanism 2011-03-19 10:27:47 +01:00
Gael Guennebaud
bbb4b35dfc test the new stack allocation mechanism 2011-03-19 08:51:38 +01:00
Gael Guennebaud
290205dfc0 fix memory leak when a custom scalar throw an exception 2011-03-19 01:06:50 +01:00
Benoit Jacob
5991d247f9 bump 2011-03-18 05:27:58 -04:00
Gael Guennebaud
37c5341d64 fix compilation for old but not so old versions of glew 2011-03-18 10:26:21 +01:00
Hauke Heibel
50a3cd678a Improved site and buildname generation. 2011-02-20 11:54:07 +01:00
412 changed files with 20117 additions and 12411 deletions

11
.hgeol
View File

@@ -1,3 +1,8 @@
[patterns]
**.* = native
eigen_autoexp_part.dat = CRLF
[patterns]
scripts/*.in = LF
debug/msvc/*.dat = CRLF
unsupported/test/mpreal/*.* = CRLF
** = native
[repository]
native = LF

View File

@@ -64,6 +64,10 @@ set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
find_package(StandardMathLibrary)
set(EIGEN_TEST_CUSTOM_LINKER_FLAGS "" CACHE STRING "Additional linker flags when linking unit tests.")
set(EIGEN_TEST_CUSTOM_CXX_FLAGS "" CACHE STRING "Additional compiler flags when compiling unit tests.")
set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "")
if(NOT STANDARD_MATH_LIBRARY_FOUND)
@@ -103,6 +107,8 @@ endif()
add_definitions("-DEIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS")
set(EIGEN_TEST_MAX_SIZE "320" CACHE STRING "Maximal matrix/vector size, default is 320")
if(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fexceptions -fno-check-new -fno-common -fstrict-aliasing")
set(CMAKE_CXX_FLAGS_DEBUG "-g3")
@@ -301,44 +307,9 @@ add_subdirectory(Eigen)
add_subdirectory(doc EXCLUDE_FROM_ALL)
add_custom_target(buildtests)
add_custom_target(check COMMAND "ctest")
add_dependencies(check buildtests)
# CMake/Ctest does not allow us to change the build command,
# so we have to workaround by directly editing the generated DartConfiguration.tcl file
# save CMAKE_MAKE_PROGRAM
set(CMAKE_MAKE_PROGRAM_SAVE ${CMAKE_MAKE_PROGRAM})
# and set a fake one
set(CMAKE_MAKE_PROGRAM "@EIGEN_MAKECOMMAND_PLACEHOLDER@")
include(CTest)
include(EigenConfigureTesting)
# fixme, not sure this line is still needed:
enable_testing() # must be called from the root CMakeLists, see man page
include(EigenTesting)
ei_init_testing()
# overwrite default DartConfiguration.tcl
# The worarounds are different for each version of the MSVC IDE
if(MSVC_IDE)
if(MSVC_VERSION EQUAL 1600) # MSVC 2010
set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} buildtests.vcxproj /p:Configuration=\${CTEST_CONFIGURATION_TYPE} \n # ")
else() # MSVC 2008 (TODO check MSVC 2005)
set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} /project buildtests")
endif()
else()
# for make and nmake
set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} buildtests")
endif()
configure_file(${CMAKE_BINARY_DIR}/DartConfiguration.tcl ${CMAKE_BINARY_DIR}/DartConfiguration.tcl)
# restore default CMAKE_MAKE_PROGRAM
set(CMAKE_MAKE_PROGRAM ${CMAKE_MAKE_PROGRAM_SAVE})
# un-set temporary variables so that it is like they never existed.
# CMake 2.6.3 introduces the more logical unset() syntax for this.
set(CMAKE_MAKE_PROGRAM_SAVE)
set(EIGEN_MAKECOMMAND_PLACEHOLDER)
configure_file(${CMAKE_SOURCE_DIR}/CTestCustom.cmake.in ${CMAKE_BINARY_DIR}/CTestCustom.cmake)
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
@@ -347,15 +318,13 @@ else()
add_subdirectory(test EXCLUDE_FROM_ALL)
endif()
if(NOT MSVC)
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
add_subdirectory(blas)
add_subdirectory(lapack)
else()
add_subdirectory(blas EXCLUDE_FROM_ALL)
add_subdirectory(lapack EXCLUDE_FROM_ALL)
endif()
endif(NOT MSVC)
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
add_subdirectory(blas)
add_subdirectory(lapack)
else()
add_subdirectory(blas EXCLUDE_FROM_ALL)
add_subdirectory(lapack EXCLUDE_FROM_ALL)
endif()
add_subdirectory(unsupported)

26
COPYING.BSD Normal file
View File

@@ -0,0 +1,26 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

View File

@@ -24,6 +24,9 @@ namespace Eigen {
#include "src/misc/Solve.h"
#include "src/Cholesky/LLT.h"
#include "src/Cholesky/LDLT.h"
#ifdef EIGEN_USE_LAPACKE
#include "src/Cholesky/LLT_MKL.h"
#endif
} // namespace Eigen

34
Eigen/CholmodSupport Normal file
View File

@@ -0,0 +1,34 @@
#ifndef EIGEN_CHOLMODSUPPORT_MODULE_H
#define EIGEN_CHOLMODSUPPORT_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
extern "C" {
#include <cholmod.h>
}
namespace Eigen {
/** \ingroup Support_modules
* \defgroup CholmodSupport_Module CholmodSupport module
*
*
* \code
* #include <Eigen/CholmodSupport>
* \endcode
*/
#include "src/misc/Solve.h"
#include "src/misc/SparseSolve.h"
#include "src/CholmodSupport/CholmodSupport.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_CHOLMODSUPPORT_MODULE_H

View File

@@ -34,6 +34,10 @@
// defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization.
#include "src/Core/util/Macros.h"
// this include file manages BLAS and MKL related macros
// and inclusion of their respective header files
#include "src/Core/util/MKL_support.h"
// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into
// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks
#if !EIGEN_ALIGN
@@ -175,9 +179,6 @@
#include <new>
#endif
// defined in bits/termios.h
#undef B0
/** \brief Namespace containing all symbols from the %Eigen library. */
namespace Eigen {
@@ -247,6 +248,10 @@ using std::ptrdiff_t;
* \endcode
*/
/** \defgroup Support_modules Support modules [category]
* Category of modules which add support for external libraries.
*/
#include "src/Core/util/Constants.h"
#include "src/Core/util/ForwardDeclarations.h"
#include "src/Core/util/Meta.h"
@@ -318,7 +323,7 @@ using std::ptrdiff_t;
#include "src/Core/CommaInitializer.h"
#include "src/Core/Flagged.h"
#include "src/Core/ProductBase.h"
#include "src/Core/Product.h"
#include "src/Core/GeneralProduct.h"
#include "src/Core/TriangularMatrix.h"
#include "src/Core/SelfAdjointView.h"
#include "src/Core/SolveTriangular.h"
@@ -347,6 +352,21 @@ using std::ptrdiff_t;
#include "src/Core/ArrayBase.h"
#include "src/Core/ArrayWrapper.h"
#ifdef EIGEN_USE_BLAS
#include "src/Core/products/GeneralMatrixMatrix_MKL.h"
#include "src/Core/products/GeneralMatrixVector_MKL.h"
#include "src/Core/products/GeneralMatrixMatrixTriangular_MKL.h"
#include "src/Core/products/SelfadjointMatrixMatrix_MKL.h"
#include "src/Core/products/SelfadjointMatrixVector_MKL.h"
#include "src/Core/products/TriangularMatrixMatrix_MKL.h"
#include "src/Core/products/TriangularMatrixVector_MKL.h"
#include "src/Core/products/TriangularSolverMatrix_MKL.h"
#endif // EIGEN_USE_BLAS
#ifdef EIGEN_USE_MKL_VML
#include "src/Core/Assign_MKL.h"
#endif
} // namespace Eigen
#include "src/Core/GlobalFunctions.h"

View File

@@ -33,7 +33,8 @@
namespace Eigen {
/** \defgroup Eigen2Support_Module Eigen2 support module
/** \ingroup Support_modules
* \defgroup Eigen2Support_Module Eigen2 support module
* This module provides a couple of deprecated functions improving the compatibility with Eigen2.
*
* To use it, define EIGEN2_SUPPORT before including any Eigen header
@@ -63,6 +64,24 @@ namespace Eigen {
// Eigen2 used to include iostream
#include<iostream>
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
using Eigen::Matrix##SizeSuffix##TypeSuffix; \
using Eigen::Vector##SizeSuffix##TypeSuffix; \
using Eigen::RowVector##SizeSuffix##TypeSuffix;
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \
#define EIGEN_USING_MATRIX_TYPEDEFS \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd)
#define USING_PART_OF_NAMESPACE_EIGEN \
EIGEN_USING_MATRIX_TYPEDEFS \
using Eigen::Matrix; \

View File

@@ -9,6 +9,7 @@
#include "Jacobi"
#include "Householder"
#include "LU"
#include "Geometry"
namespace Eigen {
@@ -35,6 +36,11 @@ namespace Eigen {
#include "src/Eigenvalues/ComplexSchur.h"
#include "src/Eigenvalues/ComplexEigenSolver.h"
#include "src/Eigenvalues/MatrixBaseEigenvalues.h"
#ifdef EIGEN_USE_LAPACKE
#include "src/Eigenvalues/RealSchur_MKL.h"
#include "src/Eigenvalues/ComplexSchur_MKL.h"
#include "src/Eigenvalues/SelfAdjointEigenSolver_MKL.h"
#endif
} // namespace Eigen

View File

@@ -0,0 +1,37 @@
#ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H
#define EIGEN_ITERATIVELINEARSOLVERS_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
/** \ingroup Sparse_modules
* \defgroup IterativeLinearSolvers_Module IterativeLinearSolvers module
*
* This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse.
* Those solvers are accessible via the following classes:
* - ConjugateGradient for selfadjoint (hermitian) matrices,
* - BiCGSTAB for general square matrices.
*
* Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport.
*
* \code
* #include <Eigen/IterativeLinearSolvers>
* \endcode
*/
#include "src/misc/Solve.h"
#include "src/misc/SparseSolve.h"
#include "src/IterativeLinearSolvers/IterativeSolverBase.h"
#include "src/IterativeLinearSolvers/BasicPreconditioners.h"
#include "src/IterativeLinearSolvers/ConjugateGradient.h"
#include "src/IterativeLinearSolvers/BiCGSTAB.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_ITERATIVELINEARSOLVERS_MODULE_H

View File

@@ -23,6 +23,9 @@ namespace Eigen {
#include "src/misc/Image.h"
#include "src/LU/FullPivLU.h"
#include "src/LU/PartialPivLU.h"
#ifdef EIGEN_USE_LAPACKE
#include "src/LU/PartialPivLU_MKL.h"
#endif
#include "src/LU/Determinant.h"
#include "src/LU/Inverse.h"

27
Eigen/OrderingMethods Normal file
View File

@@ -0,0 +1,27 @@
#ifndef EIGEN_ORDERINGMETHODS_MODULE_H
#define EIGEN_ORDERINGMETHODS_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
/** \ingroup Sparse_modules
* \defgroup OrderingMethods_Module OrderingMethods module
*
* This module is currently for internal use only.
*
*
* \code
* #include <Eigen/OrderingMethods>
* \endcode
*/
#include "src/OrderingMethods/Amd.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_ORDERINGMETHODS_MODULE_H

30
Eigen/PARDISOSupport Normal file
View File

@@ -0,0 +1,30 @@
#ifndef EIGEN_PARDISOSUPPORT_MODULE_H
#define EIGEN_PARDISOSUPPORT_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
#include <mkl_pardiso.h>
#include <unsupported/Eigen/SparseExtra>
namespace Eigen {
/** \ingroup Support_modules
* \defgroup PARDISOSupport_Module PARDISOSupport module
*
* This module brings support for the Intel(R) MKL PARDISO direct sparse solvers
*
* \code
* #include <Eigen/PARDISOSupport>
* \endcode
*/
#include "src/PARDISOSupport/PARDISOSupport.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_PARDISOSUPPORT_MODULE_H

View File

@@ -28,6 +28,10 @@ namespace Eigen {
#include "src/QR/HouseholderQR.h"
#include "src/QR/FullPivHouseholderQR.h"
#include "src/QR/ColPivHouseholderQR.h"
#ifdef EIGEN_USE_LAPACKE
#include "src/QR/HouseholderQR_MKL.h"
#include "src/QR/ColPivHouseholderQR_MKL.h"
#endif
#ifdef EIGEN2_SUPPORT
#include "src/Eigen2Support/QR.h"

View File

@@ -13,9 +13,9 @@ namespace Eigen {
*
*
*
* This module provides SVD decomposition for (currently) real matrices.
* This module provides SVD decomposition for matrices (both real and complex).
* This decomposition is accessible via the following MatrixBase method:
* - MatrixBase::svd()
* - MatrixBase::jacobiSvd()
*
* \code
* #include <Eigen/SVD>
@@ -24,6 +24,9 @@ namespace Eigen {
#include "src/misc/Solve.h"
#include "src/SVD/JacobiSVD.h"
#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
#include "src/SVD/JacobiSVD_MKL.h"
#endif
#include "src/SVD/UpperBidiagonalization.h"
#ifdef EIGEN2_SUPPORT

View File

@@ -1,69 +1,27 @@
#ifndef EIGEN_SPARSE_MODULE_H
#define EIGEN_SPARSE_MODULE_H
#include "Core"
#include "src/Core/util/DisableStupidWarnings.h"
#include <vector>
#include <map>
#include <cstdlib>
#include <cstring>
#include <algorithm>
#ifdef EIGEN2_SUPPORT
#define EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET
#endif
#ifndef EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET
#error The sparse module API is not stable yet. To use it anyway, please define the EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET preprocessor token.
#endif
namespace Eigen {
/** \defgroup Sparse_Module Sparse module
/** \defgroup Sparse_modules Sparse modules
*
*
*
* See the \ref TutorialSparse "Sparse tutorial"
* Meta-module including all related modules:
* - SparseCore
* - OrderingMethods
* - SparseCholesky
* - IterativeLinearSolvers
*
* \code
* #include <Eigen/Sparse>
* \endcode
*/
/** The type used to identify a general sparse storage. */
struct Sparse {};
#include "src/Sparse/SparseUtil.h"
#include "src/Sparse/SparseMatrixBase.h"
#include "src/Sparse/CompressedStorage.h"
#include "src/Sparse/AmbiVector.h"
#include "src/Sparse/SparseMatrix.h"
#include "src/Sparse/DynamicSparseMatrix.h"
#include "src/Sparse/MappedSparseMatrix.h"
#include "src/Sparse/SparseVector.h"
#include "src/Sparse/CoreIterators.h"
#include "src/Sparse/SparseBlock.h"
#include "src/Sparse/SparseTranspose.h"
#include "src/Sparse/SparseCwiseUnaryOp.h"
#include "src/Sparse/SparseCwiseBinaryOp.h"
#include "src/Sparse/SparseDot.h"
#include "src/Sparse/SparseAssign.h"
#include "src/Sparse/SparseRedux.h"
#include "src/Sparse/SparseFuzzy.h"
#include "src/Sparse/SparseProduct.h"
#include "src/Sparse/SparseSparseProduct.h"
#include "src/Sparse/SparseDenseProduct.h"
#include "src/Sparse/SparseDiagonalProduct.h"
#include "src/Sparse/SparseTriangularView.h"
#include "src/Sparse/SparseSelfAdjointView.h"
#include "src/Sparse/TriangularSolver.h"
#include "src/Sparse/SparseView.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#include "SparseCore"
#include "OrderingMethods"
#include "SparseCholesky"
#include "IterativeLinearSolvers"
#endif // EIGEN_SPARSE_MODULE_H

34
Eigen/SparseCholesky Normal file
View File

@@ -0,0 +1,34 @@
#ifndef EIGEN_SPARSECHOLESKY_MODULE_H
#define EIGEN_SPARSECHOLESKY_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
/** \ingroup Sparse_modules
* \defgroup SparseCholesky_Module SparseCholesky module
*
* This module currently provides two variants of the direct sparse Cholesky decomposition for selfadjoint (hermitian) matrices.
* Those decompositions are accessible via the following classes:
* - SimplicialLLt,
* - SimplicialLDLt
*
* Such problems can also be solved using the ConjugateGradient solver from the IterativeLinearSolvers module.
*
* \code
* #include <Eigen/SparseCholesky>
* \endcode
*/
#include "src/misc/Solve.h"
#include "src/misc/SparseSolve.h"
#include "src/SparseCholesky/SimplicialCholesky.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_SPARSECHOLESKY_MODULE_H

65
Eigen/SparseCore Normal file
View File

@@ -0,0 +1,65 @@
#ifndef EIGEN_SPARSECORE_MODULE_H
#define EIGEN_SPARSECORE_MODULE_H
#include "Core"
#include "src/Core/util/DisableStupidWarnings.h"
#include <vector>
#include <map>
#include <cstdlib>
#include <cstring>
#include <algorithm>
namespace Eigen {
/** \ingroup Sparse_modules
* \defgroup SparseCore_Module SparseCore module
*
* This module provides a sparse matrix representation, and basic associatd matrix manipulations
* and operations.
*
* See the \ref TutorialSparse "Sparse tutorial"
*
* \code
* #include <Eigen/SparseCore>
* \endcode
*
* This module depends on: Core.
*/
/** The type used to identify a general sparse storage. */
struct Sparse {};
#include "src/SparseCore/SparseUtil.h"
#include "src/SparseCore/SparseMatrixBase.h"
#include "src/SparseCore/CompressedStorage.h"
#include "src/SparseCore/AmbiVector.h"
#include "src/SparseCore/SparseMatrix.h"
#include "src/SparseCore/MappedSparseMatrix.h"
#include "src/SparseCore/SparseVector.h"
#include "src/SparseCore/CoreIterators.h"
#include "src/SparseCore/SparseBlock.h"
#include "src/SparseCore/SparseTranspose.h"
#include "src/SparseCore/SparseCwiseUnaryOp.h"
#include "src/SparseCore/SparseCwiseBinaryOp.h"
#include "src/SparseCore/SparseDot.h"
#include "src/SparseCore/SparseAssign.h"
#include "src/SparseCore/SparseRedux.h"
#include "src/SparseCore/SparseFuzzy.h"
#include "src/SparseCore/ConservativeSparseSparseProduct.h"
#include "src/SparseCore/SparseSparseProductWithPruning.h"
#include "src/SparseCore/SparseProduct.h"
#include "src/SparseCore/SparseDenseProduct.h"
#include "src/SparseCore/SparseDiagonalProduct.h"
#include "src/SparseCore/SparseTriangularView.h"
#include "src/SparseCore/SparseSelfAdjointView.h"
#include "src/SparseCore/TriangularSolver.h"
#include "src/SparseCore/SparseView.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_SPARSECORE_MODULE_H

53
Eigen/SuperLUSupport Normal file
View File

@@ -0,0 +1,53 @@
#ifndef EIGEN_SUPERLUSUPPORT_MODULE_H
#define EIGEN_SUPERLUSUPPORT_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
#ifdef EMPTY
#define EIGEN_EMPTY_WAS_ALREADY_DEFINED
#endif
typedef int int_t;
#include <slu_Cnames.h>
#include <supermatrix.h>
#include <slu_util.h>
// slu_util.h defines a preprocessor token named EMPTY which is really polluting,
// so we remove it in favor of a SUPERLU_EMPTY token.
// If EMPTY was already, defined then we don't undef it.
#if defined(EIGEN_EMPTY_WAS_ALREADY_DEFINED)
# undef EIGEN_EMPTY_WAS_ALREADY_DEFINED
#elif defined(EMPTY)
# undef EMPTY
#endif
#define SUPERLU_EMPTY (-1)
namespace Eigen { struct SluMatrix; }
namespace Eigen {
/** \ingroup Support_modules
* \defgroup SuperLUSupport_Module SuperLUSupport module
*
* \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting.
*
* \code
* #include <Eigen/SuperLUSupport>
* \endcode
*/
#include "src/misc/Solve.h"
#include "src/misc/SparseSolve.h"
#include "src/SuperLUSupport/SuperLUSupport.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_SUPERLUSUPPORT_MODULE_H

34
Eigen/UmfPackSupport Normal file
View File

@@ -0,0 +1,34 @@
#ifndef EIGEN_UMFPACKSUPPORT_MODULE_H
#define EIGEN_UMFPACKSUPPORT_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
extern "C" {
#include <umfpack.h>
}
namespace Eigen {
/** \ingroup Support_modules
* \defgroup UmfPackSupport_Module UmfPackSupport module
*
*
*
*
* \code
* #include <Eigen/UmfPackSupport>
* \endcode
*/
#include "src/misc/Solve.h"
#include "src/misc/SparseSolve.h"
#include "src/UmfPackSupport/UmfPackSupport.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_UMFPACKSUPPORT_MODULE_H

View File

@@ -1,9 +1,10 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2009 Keir Mierle <mierle@gmail.com>
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2011 Timothy E. Holy <tim.holy@gmail.com >
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -31,13 +32,15 @@ namespace internal {
template<typename MatrixType, int UpLo> struct LDLT_Traits;
}
/** \ingroup cholesky_Module
/** \ingroup Cholesky_Module
*
* \class LDLT
*
* \brief Robust Cholesky decomposition of a matrix with pivoting
*
* \param MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
* \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
* The other triangular part won't be read.
*
* Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite
* matrix \f$ A \f$ such that \f$ A = P^TLDL^*P \f$, where P is a permutation matrix, L
@@ -48,14 +51,10 @@ template<typename MatrixType, int UpLo> struct LDLT_Traits;
* on D also stabilizes the computation.
*
* Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky
* decomposition to determine whether a system of equations has a solution.
* decomposition to determine whether a system of equations has a solution.
*
* \sa MatrixBase::ldlt(), class LLT
*/
/* THIS PART OF THE DOX IS CURRENTLY DISABLED BECAUSE INACCURATE BECAUSE OF BUG IN THE DECOMPOSITION CODE
* Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
* the strict lower part does not have to store correct values.
*/
template<typename _MatrixType, int _UpLo> class LDLT
{
public:
@@ -98,6 +97,11 @@ template<typename _MatrixType, int _UpLo> class LDLT
m_isInitialized(false)
{}
/** \brief Constructor with decomposition
*
* This calculates the decomposition for the input \a matrix.
* \sa LDLT(Index size)
*/
LDLT(const MatrixType& matrix)
: m_matrix(matrix.rows(), matrix.cols()),
m_transpositions(matrix.rows()),
@@ -107,6 +111,14 @@ template<typename _MatrixType, int _UpLo> class LDLT
compute(matrix);
}
/** Clear any existing decomposition
* \sa rankUpdate(w,sigma)
*/
void setZero()
{
m_isInitialized = false;
}
/** \returns a view of the upper triangular matrix U */
inline typename Traits::MatrixU matrixU() const
{
@@ -130,14 +142,14 @@ template<typename _MatrixType, int _UpLo> class LDLT
}
/** \returns the coefficients of the diagonal matrix D */
inline Diagonal<const MatrixType> vectorD(void) const
inline Diagonal<const MatrixType> vectorD() const
{
eigen_assert(m_isInitialized && "LDLT is not initialized.");
return m_matrix.diagonal();
}
/** \returns true if the matrix is positive (semidefinite) */
inline bool isPositive(void) const
inline bool isPositive() const
{
eigen_assert(m_isInitialized && "LDLT is not initialized.");
return m_sign == 1;
@@ -196,6 +208,9 @@ template<typename _MatrixType, int _UpLo> class LDLT
LDLT& compute(const MatrixType& matrix);
template <typename Derived>
LDLT& rankUpdate(const MatrixBase<Derived>& w,RealScalar alpha=1);
/** \returns the internal LDLT decomposition matrix
*
* TODO: document the storage layout
@@ -211,6 +226,17 @@ template<typename _MatrixType, int _UpLo> class LDLT
inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); }
/** \brief Reports whether previous computation was successful.
*
* \returns \c Success if computation was succesful,
* \c NumericalIssue if the matrix.appears to be negative.
*/
ComputationInfo info() const
{
eigen_assert(m_isInitialized && "LDLT is not initialized.");
return Success;
}
protected:
/** \internal
@@ -249,7 +275,7 @@ template<> struct ldlt_inplace<Lower>
return true;
}
RealScalar cutoff = 0, biggest_in_corner;
RealScalar cutoff(0), biggest_in_corner;
for (Index k = 0; k < size; ++k)
{
@@ -317,6 +343,61 @@ template<> struct ldlt_inplace<Lower>
return true;
}
// Reference for the algorithm: Davis and Hager, "Multiple Rank
// Modifications of a Sparse Cholesky Factorization" (Algorithm 1)
// Trivial rearrangements of their computations (Timothy E. Holy)
// allow their algorithm to work for rank-1 updates even if the
// original matrix is not of full rank.
// Here only rank-1 updates are implemented, to reduce the
// requirement for intermediate storage and improve accuracy
template<typename MatrixType, typename WDerived>
static bool updateInPlace(MatrixType& mat, MatrixBase<WDerived>& w, typename MatrixType::RealScalar sigma=1)
{
using internal::isfinite;
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
typedef typename MatrixType::Index Index;
const Index size = mat.rows();
eigen_assert(mat.cols() == size && w.size()==size);
RealScalar alpha = 1;
// Apply the update
for (Index j = 0; j < size; j++)
{
// Check for termination due to an original decomposition of low-rank
if (!isfinite(alpha))
break;
// Update the diagonal terms
RealScalar dj = real(mat.coeff(j,j));
Scalar wj = w.coeff(j);
RealScalar swj2 = sigma*abs2(wj);
RealScalar gamma = dj*alpha + swj2;
mat.coeffRef(j,j) += swj2/alpha;
alpha += swj2/dj;
// Update the terms of L
Index rs = size-j-1;
w.tail(rs) -= wj * mat.col(j).tail(rs);
if(gamma != 0)
mat.col(j).tail(rs) += (sigma*conj(wj)/gamma)*w.tail(rs);
}
return true;
}
template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
static bool update(MatrixType& mat, const TranspositionType& transpositions, Workspace& tmp, const WType& w, typename MatrixType::RealScalar sigma=1)
{
// Apply the permutation to the input w
tmp = transpositions * w;
return ldlt_inplace<Lower>::updateInPlace(mat,tmp,sigma);
}
};
template<> struct ldlt_inplace<Upper>
@@ -327,22 +408,29 @@ template<> struct ldlt_inplace<Upper>
Transpose<MatrixType> matt(mat);
return ldlt_inplace<Lower>::unblocked(matt, transpositions, temp, sign);
}
template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
static EIGEN_STRONG_INLINE bool update(MatrixType& mat, TranspositionType& transpositions, Workspace& tmp, WType& w, typename MatrixType::RealScalar sigma=1)
{
Transpose<MatrixType> matt(mat);
return ldlt_inplace<Lower>::update(matt, transpositions, tmp, w.conjugate(), sigma);
}
};
template<typename MatrixType> struct LDLT_Traits<MatrixType,Lower>
{
typedef TriangularView<MatrixType, UnitLower> MatrixL;
typedef TriangularView<typename MatrixType::AdjointReturnType, UnitUpper> MatrixU;
inline static MatrixL getL(const MatrixType& m) { return m; }
inline static MatrixU getU(const MatrixType& m) { return m.adjoint(); }
typedef TriangularView<const MatrixType, UnitLower> MatrixL;
typedef TriangularView<const typename MatrixType::AdjointReturnType, UnitUpper> MatrixU;
static inline MatrixL getL(const MatrixType& m) { return m; }
static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
};
template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
{
typedef TriangularView<typename MatrixType::AdjointReturnType, UnitLower> MatrixL;
typedef TriangularView<MatrixType, UnitUpper> MatrixU;
inline static MatrixL getL(const MatrixType& m) { return m.adjoint(); }
inline static MatrixU getU(const MatrixType& m) { return m; }
typedef TriangularView<const typename MatrixType::AdjointReturnType, UnitLower> MatrixL;
typedef TriangularView<const MatrixType, UnitUpper> MatrixU;
static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); }
static inline MatrixU getU(const MatrixType& m) { return m; }
};
} // end namespace internal
@@ -367,6 +455,37 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
return *this;
}
/** Update the LDLT decomposition: given A = L D L^T, efficiently compute the decomposition of A + sigma w w^T.
* \param w a vector to be incorporated into the decomposition.
* \param sigma a scalar, +1 for updates and -1 for "downdates," which correspond to removing previously-added column vectors. Optional; default value is +1.
* \sa setZero()
*/
template<typename MatrixType, int _UpLo>
template<typename Derived>
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w,typename NumTraits<typename MatrixType::Scalar>::Real sigma)
{
const Index size = w.rows();
if (m_isInitialized)
{
eigen_assert(m_matrix.rows()==size);
}
else
{
m_matrix.resize(size,size);
m_matrix.setZero();
m_transpositions.resize(size);
for (Index i = 0; i < size; i++)
m_transpositions.coeffRef(i) = i;
m_temporary.resize(size);
m_sign = sigma;
m_isInitialized = true;
}
internal::ldlt_inplace<UpLo>::update(m_matrix, m_transpositions, m_temporary, w, sigma);
return *this;
}
namespace internal {
template<typename _MatrixType, int _UpLo, typename Rhs>
struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>

View File

@@ -29,13 +29,15 @@ namespace internal{
template<typename MatrixType, int UpLo> struct LLT_Traits;
}
/** \ingroup cholesky_Module
/** \ingroup Cholesky_Module
*
* \class LLT
*
* \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features
*
* \param MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
* \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
* The other triangular part won't be read.
*
* This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
* matrix A such that A = LL^* = U^*U, where L is lower triangular.
@@ -49,6 +51,9 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
* use LDLT instead for the semidefinite case. Also, do not use a Cholesky decomposition to determine whether a system of equations
* has a solution.
*
* Example: \include LLT_example.cpp
* Output: \verbinclude LLT_example.out
*
* \sa MatrixBase::llt(), class LDLT
*/
/* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
@@ -178,6 +183,9 @@ template<typename _MatrixType, int _UpLo> class LLT
inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); }
template<typename VectorType>
LLT& rankUpdate(const VectorType& vec, const RealScalar& sigma = 1);
protected:
/** \internal
* Used to compute and store L
@@ -190,16 +198,15 @@ template<typename _MatrixType, int _UpLo> class LLT
namespace internal {
template<int UpLo> struct llt_inplace;
template<typename Scalar, int UpLo> struct llt_inplace;
template<> struct llt_inplace<Lower>
template<typename Scalar> struct llt_inplace<Scalar, Lower>
{
typedef typename NumTraits<Scalar>::Real RealScalar;
template<typename MatrixType>
static typename MatrixType::Index unblocked(MatrixType& mat)
{
typedef typename MatrixType::Index Index;
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
eigen_assert(mat.rows()==mat.cols());
const Index size = mat.rows();
@@ -254,55 +261,133 @@ template<> struct llt_inplace<Lower>
}
return -1;
}
template<typename MatrixType, typename VectorType>
static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
{
typedef typename MatrixType::Index Index;
typedef typename MatrixType::ColXpr ColXpr;
typedef typename internal::remove_all<ColXpr>::type ColXprCleaned;
typedef typename ColXprCleaned::SegmentReturnType ColXprSegment;
typedef Matrix<Scalar,Dynamic,1> TempVectorType;
typedef typename TempVectorType::SegmentReturnType TempVecSegment;
int n = mat.cols();
eigen_assert(mat.rows()==n && vec.size()==n);
TempVectorType temp;
if(sigma>0)
{
// This version is based on Givens rotations.
// It is faster than the other one below, but only works for updates,
// i.e., for sigma > 0
temp = sqrt(sigma) * vec;
for(int i=0; i<n; ++i)
{
JacobiRotation<Scalar> g;
g.makeGivens(mat(i,i), -temp(i), &mat(i,i));
int rs = n-i-1;
if(rs>0)
{
ColXprSegment x(mat.col(i).tail(rs));
TempVecSegment y(temp.tail(rs));
apply_rotation_in_the_plane(x, y, g);
}
}
}
else
{
temp = vec;
RealScalar beta = 1;
for(int j=0; j<n; ++j)
{
RealScalar Ljj = real(mat.coeff(j,j));
RealScalar dj = abs2(Ljj);
Scalar wj = temp.coeff(j);
RealScalar swj2 = sigma*abs2(wj);
RealScalar gamma = dj*beta + swj2;
RealScalar x = dj + swj2/beta;
if (x<=RealScalar(0))
return j;
RealScalar nLjj = sqrt(x);
mat.coeffRef(j,j) = nLjj;
beta += swj2/dj;
// Update the terms of L
Index rs = n-j-1;
if(rs)
{
temp.tail(rs) -= (wj/Ljj) * mat.col(j).tail(rs);
if(gamma != 0)
mat.col(j).tail(rs) = (nLjj/Ljj) * mat.col(j).tail(rs) + (nLjj * sigma*conj(wj)/gamma)*temp.tail(rs);
}
}
}
return -1;
}
};
template<> struct llt_inplace<Upper>
template<typename Scalar> struct llt_inplace<Scalar, Upper>
{
typedef typename NumTraits<Scalar>::Real RealScalar;
template<typename MatrixType>
static EIGEN_STRONG_INLINE typename MatrixType::Index unblocked(MatrixType& mat)
{
Transpose<MatrixType> matt(mat);
return llt_inplace<Lower>::unblocked(matt);
return llt_inplace<Scalar, Lower>::unblocked(matt);
}
template<typename MatrixType>
static EIGEN_STRONG_INLINE typename MatrixType::Index blocked(MatrixType& mat)
{
Transpose<MatrixType> matt(mat);
return llt_inplace<Lower>::blocked(matt);
return llt_inplace<Scalar, Lower>::blocked(matt);
}
template<typename MatrixType, typename VectorType>
static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
{
Transpose<MatrixType> matt(mat);
return llt_inplace<Scalar, Lower>::rankUpdate(matt, vec.conjugate(), sigma);
}
};
template<typename MatrixType> struct LLT_Traits<MatrixType,Lower>
{
typedef TriangularView<MatrixType, Lower> MatrixL;
typedef TriangularView<typename MatrixType::AdjointReturnType, Upper> MatrixU;
inline static MatrixL getL(const MatrixType& m) { return m; }
inline static MatrixU getU(const MatrixType& m) { return m.adjoint(); }
typedef TriangularView<const MatrixType, Lower> MatrixL;
typedef TriangularView<const typename MatrixType::AdjointReturnType, Upper> MatrixU;
static inline MatrixL getL(const MatrixType& m) { return m; }
static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
static bool inplace_decomposition(MatrixType& m)
{ return llt_inplace<Lower>::blocked(m)==-1; }
{ return llt_inplace<typename MatrixType::Scalar, Lower>::blocked(m)==-1; }
};
template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
{
typedef TriangularView<typename MatrixType::AdjointReturnType, Lower> MatrixL;
typedef TriangularView<MatrixType, Upper> MatrixU;
inline static MatrixL getL(const MatrixType& m) { return m.adjoint(); }
inline static MatrixU getU(const MatrixType& m) { return m; }
typedef TriangularView<const typename MatrixType::AdjointReturnType, Lower> MatrixL;
typedef TriangularView<const MatrixType, Upper> MatrixU;
static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); }
static inline MatrixU getU(const MatrixType& m) { return m; }
static bool inplace_decomposition(MatrixType& m)
{ return llt_inplace<Upper>::blocked(m)==-1; }
{ return llt_inplace<typename MatrixType::Scalar, Upper>::blocked(m)==-1; }
};
} // end namespace internal
/** Computes / recomputes the Cholesky decomposition A = LL^* = U^*U of \a matrix
*
*
* \returns a reference to *this
*
* Example: \include TutorialLinAlgComputeTwice.cpp
* Output: \verbinclude TutorialLinAlgComputeTwice.out
*/
template<typename MatrixType, int _UpLo>
LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
{
assert(a.rows()==a.cols());
eigen_assert(a.rows()==a.cols());
const Index size = a.rows();
m_matrix.resize(size, size);
m_matrix = a;
@@ -314,6 +399,26 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
return *this;
}
/** Performs a rank one update (or dowdate) of the current decomposition.
* If A = LL^* before the rank one update,
* then after it we have LL^* = A + sigma * v v^* where \a v must be a vector
* of same dimension.
*/
template<typename MatrixType, int _UpLo>
template<typename VectorType>
LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::rankUpdate(const VectorType& v, const RealScalar& sigma)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType);
eigen_assert(v.size()==m_matrix.cols());
eigen_assert(m_isInitialized);
if(internal::llt_inplace<typename MatrixType::Scalar, UpLo>::rankUpdate(m_matrix,v,sigma)>=0)
m_info = NumericalIssue;
else
m_info = Success;
return *this;
}
namespace internal {
template<typename _MatrixType, int UpLo, typename Rhs>
struct solve_retval<LLT<_MatrixType, UpLo>, Rhs>
@@ -384,3 +489,4 @@ SelfAdjointView<MatrixType, UpLo>::llt() const
}
#endif // EIGEN_LLT_H

View File

@@ -0,0 +1,123 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* LLt decomposition based on LAPACKE_?potrf function.
********************************************************************************
*/
#ifndef EIGEN_LLT_MKL_H
#define EIGEN_LLT_MKL_H
#include "Eigen/src/Core/util/MKL_support.h"
#include <iostream>
namespace internal {
template<typename Scalar> struct mkl_llt;
#define EIGEN_MKL_LLT(EIGTYPE, MKLTYPE, MKLPREFIX) \
template<> struct mkl_llt<EIGTYPE> \
{ \
template<typename MatrixType> \
static inline typename MatrixType::Index potrf(MatrixType& m, char uplo) \
{ \
lapack_int matrix_order; \
lapack_int size, lda, info, StorageOrder; \
EIGTYPE* a; \
eigen_assert(m.rows()==m.cols()); \
/* Set up parameters for ?potrf */ \
size = m.rows(); \
StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \
matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
a = &(m.coeffRef(0,0)); \
lda = m.outerStride(); \
\
info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \
info = (info==0) ? Success : NumericalIssue; \
return info; \
} \
}; \
template<> struct llt_inplace<EIGTYPE, Lower> \
{ \
template<typename MatrixType> \
static typename MatrixType::Index blocked(MatrixType& m) \
{ \
return mkl_llt<EIGTYPE>::potrf(m, 'L'); \
} \
template<typename MatrixType, typename VectorType> \
static void rankUpdate(MatrixType& mat, const VectorType& vec) \
{ \
typedef typename MatrixType::ColXpr ColXpr; \
typedef typename internal::remove_all<ColXpr>::type ColXprCleaned; \
typedef typename ColXprCleaned::SegmentReturnType ColXprSegment; \
typedef typename MatrixType::Scalar Scalar; \
typedef Matrix<Scalar,Dynamic,1> TempVectorType; \
typedef typename TempVectorType::SegmentReturnType TempVecSegment; \
\
int n = mat.cols(); \
eigen_assert(mat.rows()==n && vec.size()==n); \
TempVectorType temp(vec); \
\
for(int i=0; i<n; ++i) \
{ \
JacobiRotation<Scalar> g; \
g.makeGivens(mat(i,i), -temp(i), &mat(i,i)); \
\
int rs = n-i-1; \
if(rs>0) \
{ \
ColXprSegment x(mat.col(i).tail(rs)); \
TempVecSegment y(temp.tail(rs)); \
apply_rotation_in_the_plane(x, y, g); \
} \
} \
} \
}; \
template<> struct llt_inplace<EIGTYPE, Upper> \
{ \
template<typename MatrixType> \
static typename MatrixType::Index blocked(MatrixType& m) \
{ \
return mkl_llt<EIGTYPE>::potrf(m, 'U'); \
} \
template<typename MatrixType, typename VectorType> \
static void rankUpdate(MatrixType& mat, const VectorType& vec) \
{ \
Transpose<MatrixType> matt(mat); \
return llt_inplace<EIGTYPE, Lower>::rankUpdate(matt, vec.conjugate()); \
} \
};
EIGEN_MKL_LLT(double, double, d)
EIGEN_MKL_LLT(float, float, s)
EIGEN_MKL_LLT(dcomplex, MKL_Complex16, z)
EIGEN_MKL_LLT(scomplex, MKL_Complex8, c)
}
#endif // EIGEN_LLT_MKL_H

View File

@@ -0,0 +1,6 @@
FILE(GLOB Eigen_CholmodSupport_SRCS "*.h")
INSTALL(FILES
${Eigen_CholmodSupport_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/CholmodSupport COMPONENT Devel
)

View File

@@ -69,11 +69,20 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
res.nzmax = mat.nonZeros();
res.nrow = mat.rows();;
res.ncol = mat.cols();
res.p = mat._outerIndexPtr();
res.i = mat._innerIndexPtr();
res.x = mat._valuePtr();
res.p = mat.outerIndexPtr();
res.i = mat.innerIndexPtr();
res.x = mat.valuePtr();
res.sorted = 1;
res.packed = 1;
if(mat.isCompressed())
{
res.packed = 1;
}
else
{
res.packed = 0;
res.nz = mat.innerNonZeroPtr();
}
res.dtype = 0;
res.stype = -1;
@@ -149,7 +158,9 @@ enum CholmodMode {
CholmodAuto, CholmodSimplicialLLt, CholmodSupernodalLLt, CholmodLDLt
};
/** \brief A Cholesky factorization and solver based on Cholmod
/** \ingroup CholmodSupport_Module
* \class CholmodDecomposition
* \brief A Cholesky factorization and solver based on Cholmod
*
* This class allows to solve for A.X = B sparse linear problems via a LL^T or LDL^T Cholesky factorization
* using the Cholmod library. The sparse matrix A must be selfajoint and positive definite. The vectors or matrices
@@ -159,6 +170,9 @@ enum CholmodMode {
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
* or Upper. Default is Lower.
*
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; isCompressed() or unisCompressed().
*
* \sa \ref TutorialSparseDirectSolvers
*/
template<typename _MatrixType, int _UpLo = Lower>
class CholmodDecomposition

View File

@@ -159,7 +159,7 @@ template<typename Derived> class ArrayBase
/** \returns an \link MatrixBase Matrix \endlink expression of this array
* \sa MatrixBase::array() */
MatrixWrapper<Derived> matrix() { return derived(); }
const MatrixWrapper<Derived> matrix() const { return derived(); }
const MatrixWrapper<const Derived> matrix() const { return derived(); }
// template<typename Dest>
// inline void evalTo(Dest& dst) const { dst = matrix(); }
@@ -174,10 +174,10 @@ template<typename Derived> class ArrayBase
protected:
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator+=(const MatrixBase<OtherDerived>& )
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator-=(const MatrixBase<OtherDerived>& )
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
};
/** replaces \c *this by \c *this - \a other.

View File

@@ -61,7 +61,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
inline ArrayWrapper(const ExpressionType& matrix) : m_expression(matrix) {}
inline ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
inline Index rows() const { return m_expression.rows(); }
inline Index cols() const { return m_expression.cols(); }
@@ -71,7 +71,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
inline const Scalar* data() const { return m_expression.data(); }
inline const CoeffReturnType coeff(Index row, Index col) const
inline CoeffReturnType coeff(Index row, Index col) const
{
return m_expression.coeff(row, col);
}
@@ -86,7 +86,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
return m_expression.const_cast_derived().coeffRef(row, col);
}
inline const CoeffReturnType coeff(Index index) const
inline CoeffReturnType coeff(Index index) const
{
return m_expression.coeff(index);
}
@@ -128,8 +128,14 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
template<typename Dest>
inline void evalTo(Dest& dst) const { dst = m_expression; }
const typename internal::remove_all<NestedExpressionType>::type&
nestedExpression() const
{
return m_expression;
}
protected:
const NestedExpressionType m_expression;
NestedExpressionType m_expression;
};
/** \class MatrixWrapper
@@ -168,7 +174,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
inline MatrixWrapper(const ExpressionType& matrix) : m_expression(matrix) {}
inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
inline Index rows() const { return m_expression.rows(); }
inline Index cols() const { return m_expression.cols(); }
@@ -178,7 +184,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
inline const Scalar* data() const { return m_expression.data(); }
inline const CoeffReturnType coeff(Index row, Index col) const
inline CoeffReturnType coeff(Index row, Index col) const
{
return m_expression.coeff(row, col);
}
@@ -193,7 +199,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
return m_expression.derived().coeffRef(row, col);
}
inline const CoeffReturnType coeff(Index index) const
inline CoeffReturnType coeff(Index index) const
{
return m_expression.coeff(index);
}
@@ -232,8 +238,14 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
m_expression.const_cast_derived().template writePacket<LoadMode>(index, x);
}
const typename internal::remove_all<NestedExpressionType>::type&
nestedExpression() const
{
return m_expression;
}
protected:
const NestedExpressionType m_expression;
NestedExpressionType m_expression;
};
#endif // EIGEN_ARRAYWRAPPER_H

View File

@@ -152,7 +152,7 @@ struct assign_DefaultTraversal_CompleteUnrolling
inner = Index % Derived1::InnerSizeAtCompileTime
};
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
dst.copyCoeffByOuterInner(outer, inner, src);
assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
@@ -162,13 +162,13 @@ struct assign_DefaultTraversal_CompleteUnrolling
template<typename Derived1, typename Derived2, int Stop>
struct assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, int Index, int Stop>
struct assign_DefaultTraversal_InnerUnrolling
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, int outer)
{
dst.copyCoeffByOuterInner(outer, Index, src);
assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
@@ -178,7 +178,7 @@ struct assign_DefaultTraversal_InnerUnrolling
template<typename Derived1, typename Derived2, int Stop>
struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, int) {}
};
/***********************
@@ -188,7 +188,7 @@ struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
template<typename Derived1, typename Derived2, int Index, int Stop>
struct assign_LinearTraversal_CompleteUnrolling
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
dst.copyCoeff(Index, src);
assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
@@ -198,7 +198,7 @@ struct assign_LinearTraversal_CompleteUnrolling
template<typename Derived1, typename Derived2, int Stop>
struct assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
};
/**************************
@@ -214,7 +214,7 @@ struct assign_innervec_CompleteUnrolling
JointAlignment = assign_traits<Derived1,Derived2>::JointAlignment
};
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
dst.template copyPacketByOuterInner<Derived2, Aligned, JointAlignment>(outer, inner, src);
assign_innervec_CompleteUnrolling<Derived1, Derived2,
@@ -225,13 +225,13 @@ struct assign_innervec_CompleteUnrolling
template<typename Derived1, typename Derived2, int Stop>
struct assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, int Index, int Stop>
struct assign_innervec_InnerUnrolling
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, int outer)
{
dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src);
assign_innervec_InnerUnrolling<Derived1, Derived2,
@@ -242,7 +242,7 @@ struct assign_innervec_InnerUnrolling
template<typename Derived1, typename Derived2, int Stop>
struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, int) {}
};
/***************************************************************************
@@ -251,24 +251,25 @@ struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
template<typename Derived1, typename Derived2,
int Traversal = assign_traits<Derived1, Derived2>::Traversal,
int Unrolling = assign_traits<Derived1, Derived2>::Unrolling>
int Unrolling = assign_traits<Derived1, Derived2>::Unrolling,
int Version = Specialized>
struct assign_impl;
/************************
*** Default traversal ***
************************/
template<typename Derived1, typename Derived2, int Unrolling>
struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling>
template<typename Derived1, typename Derived2, int Unrolling, int Version>
struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling, Version>
{
inline static void run(Derived1 &, const Derived2 &) { }
static inline void run(Derived1 &, const Derived2 &) { }
};
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize();
@@ -278,21 +279,21 @@ struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
}
};
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling, Version>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src);
}
};
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling, Version>
{
typedef typename Derived1::Index Index;
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
@@ -305,11 +306,11 @@ struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
*** Linear traversal ***
***********************/
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
const Index size = dst.size();
for(Index i = 0; i < size; ++i)
@@ -317,10 +318,10 @@ struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling>
}
};
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling, Version>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src);
@@ -331,11 +332,11 @@ struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling>
*** Inner vectorization ***
**************************/
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling, Version>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize();
@@ -346,21 +347,21 @@ struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
}
};
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling, Version>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src);
}
};
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling, Version>
{
typedef typename Derived1::Index Index;
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
@@ -398,11 +399,11 @@ struct unaligned_assign_impl<false>
}
};
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling, Version>
{
typedef typename Derived1::Index Index;
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
const Index size = dst.size();
typedef packet_traits<typename Derived1::Scalar> PacketTraits;
@@ -412,7 +413,7 @@ struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
};
const Index alignedStart = assign_traits<Derived1,Derived2>::DstIsAligned ? 0
: first_aligned(&dst.coeffRef(0), size);
: internal::first_aligned(&dst.coeffRef(0), size);
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
unaligned_assign_impl<assign_traits<Derived1,Derived2>::DstIsAligned!=0>::run(src,dst,0,alignedStart);
@@ -426,11 +427,11 @@ struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
}
};
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling, Version>
{
typedef typename Derived1::Index Index;
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
enum { size = Derived1::SizeAtCompileTime,
packetSize = packet_traits<typename Derived1::Scalar>::size,
@@ -445,11 +446,11 @@ struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnroll
*** Slice vectorization ***
***************************/
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling, Version>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
typedef packet_traits<typename Derived1::Scalar> PacketTraits;
enum {
@@ -463,7 +464,7 @@ struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
const Index outerSize = dst.outerSize();
const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
Index alignedStart = ((!alignable) || assign_traits<Derived1,Derived2>::DstIsAligned) ? 0
: first_aligned(&dst.coeffRef(0,0), innerSize);
: internal::first_aligned(&dst.coeffRef(0,0), innerSize);
for(Index outer = 0; outer < outerSize; ++outer)
{
@@ -531,19 +532,19 @@ struct assign_selector;
template<typename Derived, typename OtherDerived>
struct assign_selector<Derived,OtherDerived,false,false> {
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
};
template<typename Derived, typename OtherDerived>
struct assign_selector<Derived,OtherDerived,true,false> {
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
};
template<typename Derived, typename OtherDerived>
struct assign_selector<Derived,OtherDerived,false,true> {
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
};
template<typename Derived, typename OtherDerived>
struct assign_selector<Derived,OtherDerived,true,true> {
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
};
} // end namespace internal

217
Eigen/src/Core/Assign_MKL.h Normal file
View File

@@ -0,0 +1,217 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin()
********************************************************************************
*/
#ifndef EIGEN_ASSIGN_VML_H
#define EIGEN_ASSIGN_VML_H
namespace internal {
template<typename Op> struct vml_call
{ enum { IsSupported = 0 }; };
template<typename Dst, typename Src, typename UnaryOp>
class vml_assign_traits
{
private:
enum {
DstHasDirectAccess = Dst::Flags & DirectAccessBit,
SrcHasDirectAccess = Src::Flags & DirectAccessBit,
StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
: int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
: int(Dst::RowsAtCompileTime),
InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
: int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
: int(Dst::MaxRowsAtCompileTime),
MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
MightEnableVml = vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess
&& Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD,
MayEnableVml = MightEnableVml && LargeEnough,
MayLinearize = MayEnableVml && MightLinearize
};
public:
enum {
Traversal = MayLinearize ? LinearVectorizedTraversal
: MayEnableVml ? InnerVectorizedTraversal
: DefaultTraversal
};
};
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling,
int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal >
struct vml_assign_impl
: assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>
{
};
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>
{
typedef typename Derived1::Scalar Scalar;
typedef typename Derived1::Index Index;
static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
{
// in case we want to (or have to) skip VML at runtime we can call:
// assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer) {
const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :
&(src.nestedExpression().coeffRef(0, outer));
Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));
vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );
}
}
};
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>
{
static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
{
// in case we want to (or have to) skip VML at runtime we can call:
// assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );
}
};
// Macroses
#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \
template<typename Derived1, typename Derived2, typename UnaryOp> \
struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized> { \
static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \
vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \
} \
};
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling)
#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
#define EIGEN_MKL_VML_MODE VML_HA
#else
#define EIGEN_MKL_VML_MODE VML_LA
#endif
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
enum { IsSupported = 1 }; \
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \
} \
};
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
enum { IsSupported = 1 }; \
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \
} \
};
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
enum { IsSupported = 1 }; \
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
EIGENTYPE exponent = func.m_exponent; \
MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \
(VMLTYPE*)dst, &vmlMode); \
} \
};
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan)
//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
} // end namespace internal
#endif // EIGEN_ASSIGN_VML_H

View File

@@ -94,7 +94,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess>
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
&& (InnerStrideAtCompileTime == 1)
? PacketAccessBit : 0,
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * sizeof(Scalar)) % 16) == 0)) ? AlignedBit : 0,
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
@@ -242,6 +242,21 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
inline Index outerStride() const;
#endif
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
{
return m_xpr;
}
Index startRow() const
{
return m_startRow.value();
}
Index startCol() const
{
return m_startCol.value();
}
protected:
const typename XprType::Nested m_xpr;
@@ -304,6 +319,11 @@ class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
init();
}
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
{
return m_xpr;
}
/** \sa MapBase::innerStride() */
inline Index innerStride() const
{
@@ -341,7 +361,7 @@ class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
: m_xpr.innerStride();
}
const typename XprType::Nested m_xpr;
typename XprType::Nested m_xpr;
int m_outerStride;
};

View File

@@ -35,7 +35,7 @@ struct all_unroller
row = (UnrollCount-1) % Derived::RowsAtCompileTime
};
inline static bool run(const Derived &mat)
static inline bool run(const Derived &mat)
{
return all_unroller<Derived, UnrollCount-1>::run(mat) && mat.coeff(row, col);
}
@@ -44,13 +44,13 @@ struct all_unroller
template<typename Derived>
struct all_unroller<Derived, 1>
{
inline static bool run(const Derived &mat) { return mat.coeff(0, 0); }
static inline bool run(const Derived &mat) { return mat.coeff(0, 0); }
};
template<typename Derived>
struct all_unroller<Derived, Dynamic>
{
inline static bool run(const Derived &) { return false; }
static inline bool run(const Derived &) { return false; }
};
template<typename Derived, int UnrollCount>
@@ -61,7 +61,7 @@ struct any_unroller
row = (UnrollCount-1) % Derived::RowsAtCompileTime
};
inline static bool run(const Derived &mat)
static inline bool run(const Derived &mat)
{
return any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col);
}
@@ -70,13 +70,13 @@ struct any_unroller
template<typename Derived>
struct any_unroller<Derived, 1>
{
inline static bool run(const Derived &mat) { return mat.coeff(0, 0); }
static inline bool run(const Derived &mat) { return mat.coeff(0, 0); }
};
template<typename Derived>
struct any_unroller<Derived, Dynamic>
{
inline static bool run(const Derived &) { return false; }
static inline bool run(const Derived &) { return false; }
};
} // end namespace internal

View File

@@ -167,8 +167,8 @@ class CwiseBinaryOp : internal::no_assignment_operator,
const BinaryOp& functor() const { return m_functor; }
protected:
const LhsNested m_lhs;
const RhsNested m_rhs;
LhsNested m_lhs;
RhsNested m_rhs;
const BinaryOp m_functor;
};

View File

@@ -101,6 +101,9 @@ class CwiseNullaryOp : internal::no_assignment_operator,
return m_functor.packetOp(index);
}
/** \returns the functor representing the nullary operation */
const NullaryOp& functor() const { return m_functor; }
protected:
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;

View File

@@ -95,7 +95,7 @@ class CwiseUnaryOp : internal::no_assignment_operator,
nestedExpression() { return m_xpr.const_cast_derived(); }
protected:
const typename XprType::Nested m_xpr;
typename XprType::Nested m_xpr;
const UnaryOp m_functor;
};

View File

@@ -97,7 +97,7 @@ class CwiseUnaryView : internal::no_assignment_operator,
protected:
// FIXME changed from MatrixType::Nested because of a weird compilation error with sun CC
const typename internal::nested<MatrixType>::type m_matrix;
typename internal::nested<MatrixType>::type m_matrix;
ViewOp m_functor;
};

View File

@@ -376,12 +376,13 @@ template<typename Derived> class DenseBase
inline Derived& operator*=(const Scalar& other);
inline Derived& operator/=(const Scalar& other);
typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
/** \returns the matrix or vector obtained by evaluating this expression.
*
* Notice that in the case of a plain matrix or vector (not an expression) this function just returns
* a const reference, in order to avoid a useless copy.
*/
EIGEN_STRONG_INLINE const typename internal::eval<Derived>::type eval() const
EIGEN_STRONG_INLINE EvalReturnType eval() const
{
// Even though MSVC does not honor strong inlining when the return type
// is a dynamic matrix, we desperately need strong inlining for fixed

View File

@@ -710,16 +710,16 @@ namespace internal {
template<typename Derived, bool JustReturnZero>
struct first_aligned_impl
{
inline static typename Derived::Index run(const Derived&)
static inline typename Derived::Index run(const Derived&)
{ return 0; }
};
template<typename Derived>
struct first_aligned_impl<Derived, false>
{
inline static typename Derived::Index run(const Derived& m)
static inline typename Derived::Index run(const Derived& m)
{
return first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
return internal::first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
}
};
@@ -729,7 +729,7 @@ struct first_aligned_impl<Derived, false>
* documentation.
*/
template<typename Derived>
inline static typename Derived::Index first_aligned(const Derived& m)
static inline typename Derived::Index first_aligned(const Derived& m)
{
return first_aligned_impl
<Derived, (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>

View File

@@ -104,8 +104,8 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
: m_data(internal::constructor_without_unaligned_array_assert()) {}
inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
inline static DenseIndex rows(void) {return _Rows;}
inline static DenseIndex cols(void) {return _Cols;}
static inline DenseIndex rows(void) {return _Rows;}
static inline DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
inline const T *data() const { return m_data.array; }
@@ -120,14 +120,24 @@ template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0
inline DenseStorage(internal::constructor_without_unaligned_array_assert) {}
inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
inline void swap(DenseStorage& ) {}
inline static DenseIndex rows(void) {return _Rows;}
inline static DenseIndex cols(void) {return _Cols;}
static inline DenseIndex rows(void) {return _Rows;}
static inline DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
inline const T *data() const { return 0; }
inline T *data() { return 0; }
};
// more specializations for null matrices; these are necessary to resolve ambiguities
template<typename T, int _Options> class DenseStorage<T, 0, Dynamic, Dynamic, _Options>
: public DenseStorage<T, 0, 0, 0, _Options> { };
template<typename T, int _Rows, int _Options> class DenseStorage<T, 0, _Rows, Dynamic, _Options>
: public DenseStorage<T, 0, 0, 0, _Options> { };
template<typename T, int _Cols, int _Options> class DenseStorage<T, 0, Dynamic, _Cols, _Options>
: public DenseStorage<T, 0, 0, 0, _Options> { };
// dynamic-size matrix with fixed-size storage
template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic, Dynamic, _Options>
{
@@ -241,7 +251,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
inline static DenseIndex rows(void) {return _Rows;}
static inline DenseIndex rows(void) {return _Rows;}
inline DenseIndex cols(void) const {return m_cols;}
inline void conservativeResize(DenseIndex size, DenseIndex, DenseIndex cols)
{
@@ -278,7 +288,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
inline DenseIndex rows(void) const {return m_rows;}
inline static DenseIndex cols(void) {return _Cols;}
static inline DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex size, DenseIndex rows, DenseIndex)
{
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);

View File

@@ -2,6 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -101,6 +102,15 @@ template<typename MatrixType, int DiagIndex> class Diagonal
return 0;
}
typedef typename internal::conditional<
internal::is_lvalue<MatrixType>::value,
Scalar,
const Scalar
>::type ScalarWithConstIfNotLvalue;
inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
inline Scalar& coeffRef(Index row, Index)
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
@@ -133,8 +143,19 @@ template<typename MatrixType, int DiagIndex> class Diagonal
return m_matrix.coeff(index+rowOffset(), index+colOffset());
}
const typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() const
{
return m_matrix;
}
int index() const
{
return m_index.value();
}
protected:
const typename MatrixType::Nested m_matrix;
typename MatrixType::Nested m_matrix;
const internal::variable_if_dynamic<Index, DiagIndex> m_index;
private:

View File

@@ -72,7 +72,7 @@ class DiagonalBase : public EigenBase<Derived>
const DiagonalProduct<MatrixDerived, Derived, OnTheLeft>
operator*(const MatrixBase<MatrixDerived> &matrix) const;
inline const DiagonalWrapper<CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
inverse() const
{
return diagonal().cwiseInverse();
@@ -251,13 +251,13 @@ class DiagonalWrapper
#endif
/** Constructor from expression of diagonal coefficients to wrap. */
inline DiagonalWrapper(const DiagonalVectorType& diagonal) : m_diagonal(diagonal) {}
inline DiagonalWrapper(DiagonalVectorType& diagonal) : m_diagonal(diagonal) {}
/** \returns a const reference to the wrapped expression of diagonal coefficients. */
const DiagonalVectorType& diagonal() const { return m_diagonal; }
protected:
const typename DiagonalVectorType::Nested m_diagonal;
typename DiagonalVectorType::Nested m_diagonal;
};
/** \returns a pseudo-expression of a diagonal matrix with *this as vector of diagonal coefficients

View File

@@ -107,8 +107,8 @@ class DiagonalProduct : internal::no_assignment_operator,
m_diagonal.diagonal().template packet<DiagonalVectorPacketLoadMode>(id));
}
const typename MatrixType::Nested m_matrix;
const typename DiagonalType::Nested m_diagonal;
typename MatrixType::Nested m_matrix;
typename DiagonalType::Nested m_diagonal;
};
/** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal.

View File

@@ -176,7 +176,7 @@ template<typename Derived, int p>
struct lpNorm_selector
{
typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
inline static RealScalar run(const MatrixBase<Derived>& m)
static inline RealScalar run(const MatrixBase<Derived>& m)
{
return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p);
}
@@ -185,7 +185,7 @@ struct lpNorm_selector
template<typename Derived>
struct lpNorm_selector<Derived, 1>
{
inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
{
return m.cwiseAbs().sum();
}
@@ -194,7 +194,7 @@ struct lpNorm_selector<Derived, 1>
template<typename Derived>
struct lpNorm_selector<Derived, 2>
{
inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
{
return m.norm();
}
@@ -203,7 +203,7 @@ struct lpNorm_selector<Derived, 2>
template<typename Derived>
struct lpNorm_selector<Derived, Infinity>
{
inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
{
return m.cwiseAbs().maxCoeff();
}

View File

@@ -220,6 +220,38 @@ struct functor_traits<scalar_quotient_op<Scalar> > {
};
};
/** \internal
* \brief Template functor to compute the and of two booleans
*
* \sa class CwiseBinaryOp, ArrayBase::operator&&
*/
struct scalar_boolean_and_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
};
template<> struct functor_traits<scalar_boolean_and_op> {
enum {
Cost = NumTraits<bool>::AddCost,
PacketAccess = false
};
};
/** \internal
* \brief Template functor to compute the or of two booleans
*
* \sa class CwiseBinaryOp, ArrayBase::operator||
*/
struct scalar_boolean_or_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
};
template<> struct functor_traits<scalar_boolean_or_op> {
enum {
Cost = NumTraits<bool>::AddCost,
PacketAccess = false
};
};
// unary functors:
/** \internal

View File

@@ -35,8 +35,8 @@ struct isApprox_selector
static bool run(const Derived& x, const OtherDerived& y, typename Derived::RealScalar prec)
{
using std::min;
const typename internal::nested<Derived,2>::type nested(x);
const typename internal::nested<OtherDerived,2>::type otherNested(y);
typename internal::nested<Derived,2>::type nested(x);
typename internal::nested<OtherDerived,2>::type otherNested(y);
return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
}
};

View File

@@ -0,0 +1,624 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_GENERAL_PRODUCT_H
#define EIGEN_GENERAL_PRODUCT_H
/** \class GeneralProduct
* \ingroup Core_Module
*
* \brief Expression of the product of two general matrices or vectors
*
* \param LhsNested the type used to store the left-hand side
* \param RhsNested the type used to store the right-hand side
* \param ProductMode the type of the product
*
* This class represents an expression of the product of two general matrices.
* We call a general matrix, a dense matrix with full storage. For instance,
* This excludes triangular, selfadjoint, and sparse matrices.
* It is the return type of the operator* between general matrices. Its template
* arguments are determined automatically by ProductReturnType. Therefore,
* GeneralProduct should never be used direclty. To determine the result type of a
* function which involves a matrix product, use ProductReturnType::Type.
*
* \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
*/
template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value>
class GeneralProduct;
enum {
Large = 2,
Small = 3
};
namespace internal {
template<int Rows, int Cols, int Depth> struct product_type_selector;
template<int Size, int MaxSize> struct product_size_category
{
enum { is_large = MaxSize == Dynamic ||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
value = is_large ? Large
: Size == 1 ? 1
: Small
};
};
template<typename Lhs, typename Rhs> struct product_type
{
typedef typename remove_all<Lhs>::type _Lhs;
typedef typename remove_all<Rhs>::type _Rhs;
enum {
MaxRows = _Lhs::MaxRowsAtCompileTime,
Rows = _Lhs::RowsAtCompileTime,
MaxCols = _Rhs::MaxColsAtCompileTime,
Cols = _Rhs::ColsAtCompileTime,
MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
_Rhs::MaxRowsAtCompileTime),
Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
_Rhs::RowsAtCompileTime),
LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
};
// the splitting into different lines of code here, introducing the _select enums and the typedef below,
// is to work around an internal compiler error with gcc 4.1 and 4.2.
private:
enum {
rows_select = product_size_category<Rows,MaxRows>::value,
cols_select = product_size_category<Cols,MaxCols>::value,
depth_select = product_size_category<Depth,MaxDepth>::value
};
typedef product_type_selector<rows_select, cols_select, depth_select> selector;
public:
enum {
value = selector::ret
};
#ifdef EIGEN_DEBUG_PRODUCT
static void debug()
{
EIGEN_DEBUG_VAR(Rows);
EIGEN_DEBUG_VAR(Cols);
EIGEN_DEBUG_VAR(Depth);
EIGEN_DEBUG_VAR(rows_select);
EIGEN_DEBUG_VAR(cols_select);
EIGEN_DEBUG_VAR(depth_select);
EIGEN_DEBUG_VAR(value);
}
#endif
};
/* The following allows to select the kind of product at compile time
* based on the three dimensions of the product.
* This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
// FIXME I'm not sure the current mapping is the ideal one.
template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; };
template<> struct product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
} // end namespace internal
/** \class ProductReturnType
* \ingroup Core_Module
*
* \brief Helper class to get the correct and optimized returned type of operator*
*
* \param Lhs the type of the left-hand side
* \param Rhs the type of the right-hand side
* \param ProductMode the type of the product (determined automatically by internal::product_mode)
*
* This class defines the typename Type representing the optimized product expression
* between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
* is the recommended way to define the result type of a function returning an expression
* which involve a matrix product. The class Product should never be
* used directly.
*
* \sa class Product, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
*/
template<typename Lhs, typename Rhs, int ProductType>
struct ProductReturnType
{
// TODO use the nested type to reduce instanciations ????
// typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
// typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type;
};
template<typename Lhs, typename Rhs>
struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
{
typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
};
template<typename Lhs, typename Rhs>
struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
{
typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
};
// this is a workaround for sun CC
template<typename Lhs, typename Rhs>
struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
{};
/***********************************************************************
* Implementation of Inner Vector Vector Product
***********************************************************************/
// FIXME : maybe the "inner product" could return a Scalar
// instead of a 1x1 matrix ??
// Pro: more natural for the user
// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
// product ends up to a row-vector times col-vector product... To tackle this use
// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
namespace internal {
template<typename Lhs, typename Rhs>
struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
: traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
{};
}
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, InnerProduct>
: internal::no_assignment_operator,
public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
{
typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
public:
GeneralProduct(const Lhs& lhs, const Rhs& rhs)
{
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
}
/** Convertion to scalar */
operator const typename Base::Scalar() const {
return Base::coeff(0,0);
}
};
/***********************************************************************
* Implementation of Outer Vector Vector Product
***********************************************************************/
namespace internal {
template<int StorageOrder> struct outer_product_selector;
template<typename Lhs, typename Rhs>
struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
: traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
{};
}
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, OuterProduct>
: public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
{
public:
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
}
template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
{
internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha);
}
};
namespace internal {
template<> struct outer_product_selector<ColMajor> {
template<typename ProductType, typename Dest>
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
typedef typename Dest::Index Index;
// FIXME make sure lhs is sequentially stored
// FIXME not very good if rhs is real and lhs complex while alpha is real too
const Index cols = dest.cols();
for (Index j=0; j<cols; ++j)
dest.col(j) += (alpha * prod.rhs().coeff(j)) * prod.lhs();
}
};
template<> struct outer_product_selector<RowMajor> {
template<typename ProductType, typename Dest>
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
typedef typename Dest::Index Index;
// FIXME make sure rhs is sequentially stored
// FIXME not very good if lhs is real and rhs complex while alpha is real too
const Index rows = dest.rows();
for (Index i=0; i<rows; ++i)
dest.row(i) += (alpha * prod.lhs().coeff(i)) * prod.rhs();
}
};
} // end namespace internal
/***********************************************************************
* Implementation of General Matrix Vector Product
***********************************************************************/
/* According to the shape/flags of the matrix we have to distinghish 3 different cases:
* 1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine
* 2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine
* 3 - all other cases are handled using a simple loop along the outer-storage direction.
* Therefore we need a lower level meta selector.
* Furthermore, if the matrix is the rhs, then the product has to be transposed.
*/
namespace internal {
template<typename Lhs, typename Rhs>
struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
: traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
{};
template<int Side, int StorageOrder, bool BlasCompatible>
struct gemv_selector;
} // end namespace internal
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, GemvProduct>
: public ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs>
{
public:
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{
// EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value),
// YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
}
enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
}
};
namespace internal {
// The vector is on the left => transposition
template<int StorageOrder, bool BlasCompatible>
struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
Transpose<Dest> destT(dest);
enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct>
(prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha);
}
};
template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
{
EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
};
template<typename Scalar,int Size>
struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
{
EIGEN_STRONG_INLINE Scalar* data() { return 0; }
};
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
{
#if EIGEN_ALIGN_STATICALLY
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
#else
// Some architectures cannot align on the stack,
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
enum {
ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
PacketSize = internal::packet_traits<Scalar>::size
};
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() {
return ForceAlignment
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(15))) + 16)
: m_data.array;
}
#endif
};
template<> struct gemv_selector<OnTheRight,ColMajor,true>
{
template<typename ProductType, typename Dest>
static inline void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename ProductType::Index Index;
typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
typedef typename ProductType::Scalar ResScalar;
typedef typename ProductType::RealScalar RealScalar;
typedef typename ProductType::ActualLhsType ActualLhsType;
typedef typename ProductType::ActualRhsType ActualRhsType;
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
};
gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
bool alphaIsCompatible = (!ComplexByReal) || (imag(actualAlpha)==RealScalar(0));
bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
evalToDest ? dest.data() : static_dest.data());
if(!evalToDest)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if(!alphaIsCompatible)
{
MappedDest(actualDestPtr, dest.size()).setZero();
compatibleAlpha = RhsScalar(1);
}
else
MappedDest(actualDestPtr, dest.size()) = dest;
}
general_matrix_vector_product
<Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
actualLhs.data(), actualLhs.outerStride(),
actualRhs.data(), actualRhs.innerStride(),
actualDestPtr, 1,
compatibleAlpha);
if (!evalToDest)
{
if(!alphaIsCompatible)
dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
else
dest = MappedDest(actualDestPtr, dest.size());
}
}
};
template<> struct gemv_selector<OnTheRight,RowMajor,true>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
typedef typename ProductType::Scalar ResScalar;
typedef typename ProductType::Index Index;
typedef typename ProductType::ActualLhsType ActualLhsType;
typedef typename ProductType::ActualRhsType ActualRhsType;
typedef typename ProductType::_ActualRhsType _ActualRhsType;
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
};
gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
if(!DirectlyUseRhs)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = actualRhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
}
general_matrix_vector_product
<Index,LhsScalar,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
actualLhs.data(), actualLhs.outerStride(),
actualRhsPtr, 1,
dest.data(), dest.innerStride(),
actualAlpha);
}
};
template<> struct gemv_selector<OnTheRight,ColMajor,false>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename Dest::Index Index;
// TODO makes sure dest is sequentially stored in memory, otherwise use a temp
const Index size = prod.rhs().rows();
for(Index k=0; k<size; ++k)
dest += (alpha*prod.rhs().coeff(k)) * prod.lhs().col(k);
}
};
template<> struct gemv_selector<OnTheRight,RowMajor,false>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename Dest::Index Index;
// TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
const Index rows = prod.rows();
for(Index i=0; i<rows; ++i)
dest.coeffRef(i) += alpha * (prod.lhs().row(i).cwiseProduct(prod.rhs().transpose())).sum();
}
};
} // end namespace internal
/***************************************************************************
* Implementation of matrix base methods
***************************************************************************/
/** \returns the matrix product of \c *this and \a other.
*
* \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*().
*
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
*/
template<typename Derived>
template<typename OtherDerived>
inline const typename ProductReturnType<Derived, OtherDerived>::Type
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
{
// A note regarding the function declaration: In MSVC, this function will sometimes
// not be inlined since DenseStorage is an unwindable object for dynamic
// matrices and product types are holding a member to store the result.
// Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
enum {
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|| OtherDerived::RowsAtCompileTime==Dynamic
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
};
// note to the lost user:
// * for a dot product use: v1.dot(v2)
// * for a coeff-wise product use: v1.cwiseProduct(v2)
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
#ifdef EIGEN_DEBUG_PRODUCT
internal::product_type<Derived,OtherDerived>::debug();
#endif
return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
}
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
*
* The returned product will behave like any other expressions: the coefficients of the product will be
* computed once at a time as requested. This might be useful in some extremely rare cases when only
* a small and no coherent fraction of the result's coefficients have to be computed.
*
* \warning This version of the matrix product can be much much slower. So use it only if you know
* what you are doing and that you measured a true speed improvement.
*
* \sa operator*(const MatrixBase&)
*/
template<typename Derived>
template<typename OtherDerived>
const typename LazyProductReturnType<Derived,OtherDerived>::Type
MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
{
enum {
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|| OtherDerived::RowsAtCompileTime==Dynamic
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
};
// note to the lost user:
// * for a dot product use: v1.dot(v2)
// * for a coeff-wise product use: v1.cwiseProduct(v2)
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
}
#endif // EIGEN_PRODUCT_H

View File

@@ -312,7 +312,7 @@ template<int Offset,typename PacketType>
struct palign_impl
{
// by default data are aligned, so there is nothing to be done :)
inline static void run(PacketType&, const PacketType&) {}
static inline void run(PacketType&, const PacketType&) {}
};
/** \internal update \a first using the concatenation of the \a Offset last elements

View File

@@ -171,7 +171,7 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
return s;
}
const typename Derived::Nested m = _m;
typename Derived::Nested m = _m;
typedef typename Derived::Scalar Scalar;
typedef typename Derived::Index Index;

View File

@@ -309,8 +309,7 @@ struct abs2_impl<std::complex<RealScalar> >
{
static inline RealScalar run(const std::complex<RealScalar>& x)
{
using std::norm;
return norm(x);
return real(x)*real(x) + imag(x)*imag(x);
}
};
@@ -553,7 +552,7 @@ struct pow_default_impl<Scalar, true>
{
static inline Scalar run(Scalar x, Scalar y)
{
Scalar res = 1;
Scalar res(1);
eigen_assert(!NumTraits<Scalar>::IsSigned || y >= 0);
if(y & 1) res *= x;
y >>= 1;
@@ -838,6 +837,17 @@ template<> struct scalar_fuzzy_impl<bool>
};
/****************************************************************************
* Special functions *
****************************************************************************/
// std::isfinite is non standard, so let's define our own version,
// even though it is not very efficient.
template<typename T> bool isfinite(const T& x)
{
return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
}
} // end namespace internal
#endif // EIGEN_MATHFUNCTIONS_H

View File

@@ -411,25 +411,6 @@ EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
#undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES
#undef EIGEN_MAKE_TYPEDEFS
#undef EIGEN_MAKE_TYPEDEFS_LARGE
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
using Eigen::Matrix##SizeSuffix##TypeSuffix; \
using Eigen::Vector##SizeSuffix##TypeSuffix; \
using Eigen::RowVector##SizeSuffix##TypeSuffix;
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \
#define EIGEN_USING_MATRIX_TYPEDEFS \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd)
#undef EIGEN_MAKE_FIXED_TYPEDEFS
#endif // EIGEN_MATRIX_H

View File

@@ -250,8 +250,7 @@ template<typename Derived> class MatrixBase
// huuuge hack. make Eigen2's matrix.part<Diagonal>() work in eigen3. Problem: Diagonal is now a class template instead
// of an integer constant. Solution: overload the part() method template wrt template parameters list.
// Note: replacing next line by "template<template<typename T, int n> class U>" produces a mysterious error C2082 in MSVC.
template<template<typename, int> class U>
template<template<typename T, int n> class U>
const DiagonalWrapper<ConstDiagonalReturnType> part() const
{ return diagonal().asDiagonal(); }
#endif // EIGEN2_SUPPORT
@@ -331,7 +330,7 @@ template<typename Derived> class MatrixBase
/** \returns an \link ArrayBase Array \endlink expression of this matrix
* \sa ArrayBase::matrix() */
ArrayWrapper<Derived> array() { return derived(); }
const ArrayWrapper<Derived> array() const { return derived(); }
const ArrayWrapper<const Derived> array() const { return derived(); }
/////////// LU module ///////////
@@ -466,6 +465,8 @@ template<typename Derived> class MatrixBase
const MatrixFunctionReturnValue<Derived> sinh() const;
const MatrixFunctionReturnValue<Derived> cos() const;
const MatrixFunctionReturnValue<Derived> sin() const;
const MatrixSquareRootReturnValue<Derived> sqrt() const;
const MatrixLogarithmReturnValue<Derived> log() const;
#ifdef EIGEN2_SUPPORT
template<typename ProductDerived, typename Lhs, typename Rhs>
@@ -512,10 +513,10 @@ template<typename Derived> class MatrixBase
protected:
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& )
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator-=(const ArrayBase<OtherDerived>& )
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
};
#endif // EIGEN_MATRIXBASE_H

View File

@@ -81,14 +81,14 @@ template<typename T> struct GenericNumTraits
>::type NonInteger;
typedef T Nested;
inline static Real epsilon() { return std::numeric_limits<T>::epsilon(); }
inline static Real dummy_precision()
static inline Real epsilon() { return std::numeric_limits<T>::epsilon(); }
static inline Real dummy_precision()
{
// make sure to override this for floating-point types
return Real(0);
}
inline static T highest() { return (std::numeric_limits<T>::max)(); }
inline static T lowest() { return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)()); }
static inline T highest() { return (std::numeric_limits<T>::max)(); }
static inline T lowest() { return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)()); }
#ifdef EIGEN2_SUPPORT
enum {
@@ -104,12 +104,12 @@ template<typename T> struct NumTraits : GenericNumTraits<T>
template<> struct NumTraits<float>
: GenericNumTraits<float>
{
inline static float dummy_precision() { return 1e-5f; }
static inline float dummy_precision() { return 1e-5f; }
};
template<> struct NumTraits<double> : GenericNumTraits<double>
{
inline static double dummy_precision() { return 1e-12; }
static inline double dummy_precision() { return 1e-12; }
};
template<> struct NumTraits<long double>
@@ -130,8 +130,8 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
};
inline static Real epsilon() { return NumTraits<Real>::epsilon(); }
inline static Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
};
template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>

View File

@@ -511,7 +511,7 @@ class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesTyp
protected:
const typename IndicesType::Nested m_indices;
typename IndicesType::Nested m_indices;
};
/** \returns the matrix with the permutation applied to the columns.
@@ -608,7 +608,7 @@ struct permut_matrix_product_retval
protected:
const PermutationType& m_permutation;
const typename MatrixType::Nested m_matrix;
typename MatrixType::Nested m_matrix;
};
/* Template partial specialization for transposed/inverse permutations */

View File

@@ -53,7 +53,7 @@ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct m
} // end namespace internal
/**
/** \class PlainObjectBase
* \brief %Dense storage base class for matrices and arrays.
*
* This class can be extended with the help of the plugin mechanism described on the page
@@ -61,8 +61,29 @@ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct m
*
* \sa \ref TopicClassHierarchy
*/
#ifdef EIGEN_PARSED_BY_DOXYGEN
namespace internal {
// this is a warkaround to doxygen not being able to understand the inheritence logic
// when it is hidden by the dense_xpr_base helper struct.
template<typename Derived> struct dense_xpr_base_dispatcher_for_doxygen;// : public MatrixBase<Derived> {};
/** This class is just a workaround for Doxygen and it does not not actually exist. */
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct dense_xpr_base_dispatcher_for_doxygen<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
: public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
/** This class is just a workaround for Doxygen and it does not not actually exist. */
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct dense_xpr_base_dispatcher_for_doxygen<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
: public ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
} // namespace internal
template<typename Derived>
class PlainObjectBase : public internal::dense_xpr_base_dispatcher_for_doxygen<Derived>
#else
template<typename Derived>
class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
#endif
{
public:
enum { Options = internal::traits<Derived>::Options };
@@ -443,68 +464,68 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
* \see class Map
*/
//@{
inline static ConstMapType Map(const Scalar* data)
static inline ConstMapType Map(const Scalar* data)
{ return ConstMapType(data); }
inline static MapType Map(Scalar* data)
static inline MapType Map(Scalar* data)
{ return MapType(data); }
inline static ConstMapType Map(const Scalar* data, Index size)
static inline ConstMapType Map(const Scalar* data, Index size)
{ return ConstMapType(data, size); }
inline static MapType Map(Scalar* data, Index size)
static inline MapType Map(Scalar* data, Index size)
{ return MapType(data, size); }
inline static ConstMapType Map(const Scalar* data, Index rows, Index cols)
static inline ConstMapType Map(const Scalar* data, Index rows, Index cols)
{ return ConstMapType(data, rows, cols); }
inline static MapType Map(Scalar* data, Index rows, Index cols)
static inline MapType Map(Scalar* data, Index rows, Index cols)
{ return MapType(data, rows, cols); }
inline static ConstAlignedMapType MapAligned(const Scalar* data)
static inline ConstAlignedMapType MapAligned(const Scalar* data)
{ return ConstAlignedMapType(data); }
inline static AlignedMapType MapAligned(Scalar* data)
static inline AlignedMapType MapAligned(Scalar* data)
{ return AlignedMapType(data); }
inline static ConstAlignedMapType MapAligned(const Scalar* data, Index size)
static inline ConstAlignedMapType MapAligned(const Scalar* data, Index size)
{ return ConstAlignedMapType(data, size); }
inline static AlignedMapType MapAligned(Scalar* data, Index size)
static inline AlignedMapType MapAligned(Scalar* data, Index size)
{ return AlignedMapType(data, size); }
inline static ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols)
static inline ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols)
{ return ConstAlignedMapType(data, rows, cols); }
inline static AlignedMapType MapAligned(Scalar* data, Index rows, Index cols)
static inline AlignedMapType MapAligned(Scalar* data, Index rows, Index cols)
{ return AlignedMapType(data, rows, cols); }
template<int Outer, int Inner>
inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, const Stride<Outer, Inner>& stride)
static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, const Stride<Outer, Inner>& stride)
{ return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, stride); }
template<int Outer, int Inner>
inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, const Stride<Outer, Inner>& stride)
static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, const Stride<Outer, Inner>& stride)
{ return typename StridedMapType<Stride<Outer, Inner> >::type(data, stride); }
template<int Outer, int Inner>
inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
{ return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, size, stride); }
template<int Outer, int Inner>
inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
{ return typename StridedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
template<int Outer, int Inner>
inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
{ return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
template<int Outer, int Inner>
inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
{ return typename StridedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
template<int Outer, int Inner>
inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, const Stride<Outer, Inner>& stride)
static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, const Stride<Outer, Inner>& stride)
{ return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
template<int Outer, int Inner>
inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, const Stride<Outer, Inner>& stride)
static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, const Stride<Outer, Inner>& stride)
{ return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
template<int Outer, int Inner>
inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
{ return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
template<int Outer, int Inner>
inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
{ return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
template<int Outer, int Inner>
inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
{ return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
template<int Outer, int Inner>
inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
{ return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
//@}
@@ -594,6 +615,9 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
template<typename T0, typename T1>
EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
{
EIGEN_STATIC_ASSERT(bool(NumTraits<T0>::IsInteger) &&
bool(NumTraits<T1>::IsInteger),
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
eigen_assert(rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
internal::check_rows_cols_for_overflow(rows, cols);
@@ -623,7 +647,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
public:
#ifndef EIGEN_PARSED_BY_DOXYGEN
EIGEN_STRONG_INLINE static void _check_template_params()
static EIGEN_STRONG_INLINE void _check_template_params()
{
EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor)
&& EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (Options&RowMajor)==0)

View File

@@ -1,8 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -26,600 +25,89 @@
#ifndef EIGEN_PRODUCT_H
#define EIGEN_PRODUCT_H
/** \class GeneralProduct
template<typename Lhs, typename Rhs> class Product;
template<typename Lhs, typename Rhs, typename StorageKind> class ProductImpl;
/** \class Product
* \ingroup Core_Module
*
* \brief Expression of the product of two general matrices or vectors
* \brief Expression of the product of two arbitrary matrices or vectors
*
* \param LhsNested the type used to store the left-hand side
* \param RhsNested the type used to store the right-hand side
* \param ProductMode the type of the product
* \param Lhs the type of the left-hand side expression
* \param Rhs the type of the right-hand side expression
*
* This class represents an expression of the product of two general matrices.
* We call a general matrix, a dense matrix with full storage. For instance,
* This excludes triangular, selfadjoint, and sparse matrices.
* It is the return type of the operator* between general matrices. Its template
* arguments are determined automatically by ProductReturnType. Therefore,
* GeneralProduct should never be used direclty. To determine the result type of a
* function which involves a matrix product, use ProductReturnType::Type.
* This class represents an expression of the product of two arbitrary matrices.
*
* \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
*/
template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value>
class GeneralProduct;
enum {
Large = 2,
Small = 3
};
namespace internal {
template<int Rows, int Cols, int Depth> struct product_type_selector;
template<int Size, int MaxSize> struct product_size_category
template<typename Lhs, typename Rhs>
struct traits<Product<Lhs, Rhs> >
{
enum { is_large = MaxSize == Dynamic ||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
value = is_large ? Large
: Size == 1 ? 1
: Small
typedef MatrixXpr XprKind;
typedef typename remove_all<Lhs>::type LhsCleaned;
typedef typename remove_all<Rhs>::type RhsCleaned;
typedef typename scalar_product_traits<typename traits<LhsCleaned>::Scalar, typename traits<RhsCleaned>::Scalar>::ReturnType Scalar;
typedef typename promote_storage_type<typename traits<LhsCleaned>::StorageKind,
typename traits<RhsCleaned>::StorageKind>::ret StorageKind;
typedef typename promote_index_type<typename traits<LhsCleaned>::Index,
typename traits<RhsCleaned>::Index>::type Index;
enum {
RowsAtCompileTime = LhsCleaned::RowsAtCompileTime,
ColsAtCompileTime = RhsCleaned::ColsAtCompileTime,
MaxRowsAtCompileTime = LhsCleaned::MaxRowsAtCompileTime,
MaxColsAtCompileTime = RhsCleaned::MaxColsAtCompileTime,
Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0), // TODO should be no storage order
CoeffReadCost = 0 // TODO CoeffReadCost should not be part of the expression traits
};
};
template<typename Lhs, typename Rhs> struct product_type
{
typedef typename remove_all<Lhs>::type _Lhs;
typedef typename remove_all<Rhs>::type _Rhs;
enum {
MaxRows = _Lhs::MaxRowsAtCompileTime,
Rows = _Lhs::RowsAtCompileTime,
MaxCols = _Rhs::MaxColsAtCompileTime,
Cols = _Rhs::ColsAtCompileTime,
MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
_Rhs::MaxRowsAtCompileTime),
Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
_Rhs::RowsAtCompileTime),
LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
};
// the splitting into different lines of code here, introducing the _select enums and the typedef below,
// is to work around an internal compiler error with gcc 4.1 and 4.2.
private:
enum {
rows_select = product_size_category<Rows,MaxRows>::value,
cols_select = product_size_category<Cols,MaxCols>::value,
depth_select = product_size_category<Depth,MaxDepth>::value
};
typedef product_type_selector<rows_select, cols_select, depth_select> selector;
public:
enum {
value = selector::ret
};
#ifdef EIGEN_DEBUG_PRODUCT
static void debug()
{
EIGEN_DEBUG_VAR(Rows);
EIGEN_DEBUG_VAR(Cols);
EIGEN_DEBUG_VAR(Depth);
EIGEN_DEBUG_VAR(rows_select);
EIGEN_DEBUG_VAR(cols_select);
EIGEN_DEBUG_VAR(depth_select);
EIGEN_DEBUG_VAR(value);
}
#endif
};
/* The following allows to select the kind of product at compile time
* based on the three dimensions of the product.
* This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
// FIXME I'm not sure the current mapping is the ideal one.
template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; };
template<> struct product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
} // end namespace internal
/** \class ProductReturnType
* \ingroup Core_Module
*
* \brief Helper class to get the correct and optimized returned type of operator*
*
* \param Lhs the type of the left-hand side
* \param Rhs the type of the right-hand side
* \param ProductMode the type of the product (determined automatically by internal::product_mode)
*
* This class defines the typename Type representing the optimized product expression
* between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
* is the recommended way to define the result type of a function returning an expression
* which involve a matrix product. The class Product should never be
* used directly.
*
* \sa class Product, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
*/
template<typename Lhs, typename Rhs, int ProductType>
struct ProductReturnType
{
// TODO use the nested type to reduce instanciations ????
// typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
// typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type;
};
template<typename Lhs, typename Rhs>
struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
{
typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
};
template<typename Lhs, typename Rhs>
struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
{
typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
};
// this is a workaround for sun CC
template<typename Lhs, typename Rhs>
struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
{};
/***********************************************************************
* Implementation of Inner Vector Vector Product
***********************************************************************/
// FIXME : maybe the "inner product" could return a Scalar
// instead of a 1x1 matrix ??
// Pro: more natural for the user
// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
// product ends up to a row-vector times col-vector product... To tackle this use
// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
namespace internal {
template<typename Lhs, typename Rhs>
struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
: traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
{};
}
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, InnerProduct>
: internal::no_assignment_operator,
public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
{
typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
public:
GeneralProduct(const Lhs& lhs, const Rhs& rhs)
{
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
}
/** Convertion to scalar */
operator const typename Base::Scalar() const {
return Base::coeff(0,0);
}
};
/***********************************************************************
* Implementation of Outer Vector Vector Product
***********************************************************************/
namespace internal {
template<int StorageOrder> struct outer_product_selector;
template<typename Lhs, typename Rhs>
struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
: traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
{};
}
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, OuterProduct>
: public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
typename internal::traits<Rhs>::StorageKind>::ret>
{
public:
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
}
template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
{
internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha);
}
};
namespace internal {
template<> struct outer_product_selector<ColMajor> {
template<typename ProductType, typename Dest>
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
typedef typename Dest::Index Index;
// FIXME make sure lhs is sequentially stored
// FIXME not very good if rhs is real and lhs complex while alpha is real too
const Index cols = dest.cols();
for (Index j=0; j<cols; ++j)
dest.col(j) += (alpha * prod.rhs().coeff(j)) * prod.lhs();
}
};
template<> struct outer_product_selector<RowMajor> {
template<typename ProductType, typename Dest>
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
typedef typename Dest::Index Index;
// FIXME make sure rhs is sequentially stored
// FIXME not very good if lhs is real and rhs complex while alpha is real too
const Index rows = dest.rows();
for (Index i=0; i<rows; ++i)
dest.row(i) += (alpha * prod.lhs().coeff(i)) * prod.rhs();
}
};
} // end namespace internal
/***********************************************************************
* Implementation of General Matrix Vector Product
***********************************************************************/
/* According to the shape/flags of the matrix we have to distinghish 3 different cases:
* 1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine
* 2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine
* 3 - all other cases are handled using a simple loop along the outer-storage direction.
* Therefore we need a lower level meta selector.
* Furthermore, if the matrix is the rhs, then the product has to be transposed.
*/
namespace internal {
template<typename Lhs, typename Rhs>
struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
: traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
{};
template<int Side, int StorageOrder, bool BlasCompatible>
struct gemv_selector;
} // end namespace internal
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, GemvProduct>
: public ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs>
{
public:
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{
// EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value),
// YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
}
enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
}
};
namespace internal {
// The vector is on the left => transposition
template<int StorageOrder, bool BlasCompatible>
struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
Transpose<Dest> destT(dest);
enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct>
(prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha);
}
};
template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
{
EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
};
template<typename Scalar,int Size>
struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
{
EIGEN_STRONG_INLINE Scalar* data() { return 0; }
};
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
{
#if EIGEN_ALIGN_STATICALLY
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
#else
// Some architectures cannot align on the stack,
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
enum {
ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
PacketSize = internal::packet_traits<Scalar>::size
};
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() {
return ForceAlignment
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(15))) + 16)
: m_data.array;
}
#endif
};
template<> struct gemv_selector<OnTheRight,ColMajor,true>
{
template<typename ProductType, typename Dest>
static inline void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename ProductType::Index Index;
typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
typedef typename ProductType::Scalar ResScalar;
typedef typename ProductType::RealScalar RealScalar;
typedef typename ProductType::ActualLhsType ActualLhsType;
typedef typename ProductType::ActualRhsType ActualRhsType;
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
const ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
const ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
};
gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
// this is written like this (i.e., with a ?:) to workaround an ICE with ICC 12
bool alphaIsCompatible = (!ComplexByReal) ? true : (imag(actualAlpha)==RealScalar(0));
bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
typedef typename ProductImpl<
Lhs, Rhs,
typename internal::promote_storage_type<typename Lhs::StorageKind,
typename Rhs::StorageKind>::ret>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
evalToDest ? dest.data() : static_dest.data());
if(!evalToDest)
typedef typename Lhs::Nested LhsNested;
typedef typename Rhs::Nested RhsNested;
typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if(!alphaIsCompatible)
{
MappedDest(actualDestPtr, dest.size()).setZero();
compatibleAlpha = RhsScalar(1);
}
else
MappedDest(actualDestPtr, dest.size()) = dest;
eigen_assert(lhs.cols() == rhs.rows()
&& "invalid matrix product"
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
}
general_matrix_vector_product
<Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
&actualLhs.coeffRef(0,0), actualLhs.outerStride(),
actualRhs.data(), actualRhs.innerStride(),
actualDestPtr, 1,
compatibleAlpha);
inline Index rows() const { return m_lhs.rows(); }
inline Index cols() const { return m_rhs.cols(); }
if (!evalToDest)
{
if(!alphaIsCompatible)
dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
else
dest = MappedDest(actualDestPtr, dest.size());
}
}
const LhsNestedCleaned& lhs() const { return m_lhs; }
const RhsNestedCleaned& rhs() const { return m_rhs; }
protected:
const LhsNested m_lhs;
const RhsNested m_rhs;
};
template<> struct gemv_selector<OnTheRight,RowMajor,true>
template<typename Lhs, typename Rhs>
class ProductImpl<Lhs,Rhs,Dense> : public internal::dense_xpr_base<Product<Lhs,Rhs> >::type
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
typedef typename ProductType::Scalar ResScalar;
typedef typename ProductType::Index Index;
typedef typename ProductType::ActualLhsType ActualLhsType;
typedef typename ProductType::ActualRhsType ActualRhsType;
typedef typename ProductType::_ActualRhsType _ActualRhsType;
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typedef Product<Lhs, Rhs> Derived;
public:
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
};
gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
if(!DirectlyUseRhs)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = actualRhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
}
general_matrix_vector_product
<Index,LhsScalar,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
&actualLhs.coeffRef(0,0), actualLhs.outerStride(),
actualRhsPtr, 1,
&dest.coeffRef(0,0), dest.innerStride(),
actualAlpha);
}
typedef typename internal::dense_xpr_base<Product<Lhs, Rhs> >::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
};
template<> struct gemv_selector<OnTheRight,ColMajor,false>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename Dest::Index Index;
// TODO makes sure dest is sequentially stored in memory, otherwise use a temp
const Index size = prod.rhs().rows();
for(Index k=0; k<size; ++k)
dest += (alpha*prod.rhs().coeff(k)) * prod.lhs().col(k);
}
};
template<> struct gemv_selector<OnTheRight,RowMajor,false>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename Dest::Index Index;
// TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
const Index rows = prod.rows();
for(Index i=0; i<rows; ++i)
dest.coeffRef(i) += alpha * (prod.lhs().row(i).cwiseProduct(prod.rhs().transpose())).sum();
}
};
} // end namespace internal
/***************************************************************************
* Implementation of matrix base methods
***************************************************************************/
/** \returns the matrix product of \c *this and \a other.
*
* \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*().
*
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
*/
template<typename Derived>
template<typename OtherDerived>
inline const typename ProductReturnType<Derived,OtherDerived>::Type
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
{
// A note regarding the function declaration: In MSVC, this function will sometimes
// not be inlined since DenseStorage is an unwindable object for dynamic
// matrices and product types are holding a member to store the result.
// Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
enum {
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|| OtherDerived::RowsAtCompileTime==Dynamic
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
};
// note to the lost user:
// * for a dot product use: v1.dot(v2)
// * for a coeff-wise product use: v1.cwiseProduct(v2)
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
#ifdef EIGEN_DEBUG_PRODUCT
internal::product_type<Derived,OtherDerived>::debug();
#endif
return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
}
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
*
* The returned product will behave like any other expressions: the coefficients of the product will be
* computed once at a time as requested. This might be useful in some extremely rare cases when only
* a small and no coherent fraction of the result's coefficients have to be computed.
*
* \warning This version of the matrix product can be much much slower. So use it only if you know
* what you are doing and that you measured a true speed improvement.
*
* \sa operator*(const MatrixBase&)
*/
template<typename Derived>
template<typename OtherDerived>
const typename LazyProductReturnType<Derived,OtherDerived>::Type
MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
{
enum {
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|| OtherDerived::RowsAtCompileTime==Dynamic
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
};
// note to the lost user:
// * for a dot product use: v1.dot(v2)
// * for a coeff-wise product use: v1.cwiseProduct(v2)
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
}
#endif // EIGEN_PRODUCT_H

View File

@@ -115,10 +115,10 @@ class ProductBase : public MatrixBase<Derived>
inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,Scalar(1)); }
template<typename Dest>
inline void addTo(Dest& dst) const { scaleAndAddTo(dst,1); }
inline void addTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(1)); }
template<typename Dest>
inline void subTo(Dest& dst) const { scaleAndAddTo(dst,-1); }
inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); }
template<typename Dest>
inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { derived().scaleAndAddTo(dst,alpha); }
@@ -179,8 +179,8 @@ class ProductBase : public MatrixBase<Derived>
protected:
const LhsNested m_lhs;
const RhsNested m_rhs;
LhsNested m_lhs;
RhsNested m_rhs;
mutable PlainObject m_result;
};

View File

@@ -95,7 +95,7 @@ struct redux_novec_unroller
typedef typename Derived::Scalar Scalar;
EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func& func)
static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
{
return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
@@ -112,7 +112,7 @@ struct redux_novec_unroller<Func, Derived, Start, 1>
typedef typename Derived::Scalar Scalar;
EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func&)
static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
{
return mat.coeffByOuterInner(outer, inner);
}
@@ -125,7 +125,7 @@ template<typename Func, typename Derived, int Start>
struct redux_novec_unroller<Func, Derived, Start, 0>
{
typedef typename Derived::Scalar Scalar;
EIGEN_STRONG_INLINE static Scalar run(const Derived&, const Func&) { return Scalar(); }
static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
};
/*** vectorization ***/
@@ -141,7 +141,7 @@ struct redux_vec_unroller
typedef typename Derived::Scalar Scalar;
typedef typename packet_traits<Scalar>::type PacketScalar;
EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func& func)
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
{
return func.packetOp(
redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
@@ -162,7 +162,7 @@ struct redux_vec_unroller<Func, Derived, Start, 1>
typedef typename Derived::Scalar Scalar;
typedef typename packet_traits<Scalar>::type PacketScalar;
EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func&)
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
{
return mat.template packetByOuterInner<alignment>(outer, inner);
}
@@ -214,20 +214,33 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
const Index size = mat.size();
eigen_assert(size && "you are using an empty matrix");
const Index packetSize = packet_traits<Scalar>::size;
const Index alignedStart = first_aligned(mat);
const Index alignedStart = internal::first_aligned(mat);
enum {
alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
? Aligned : Unaligned
};
const Index alignedSize = ((size-alignedStart)/packetSize)*packetSize;
const Index alignedEnd = alignedStart + alignedSize;
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
const Index alignedEnd2 = alignedStart + alignedSize2;
const Index alignedEnd = alignedStart + alignedSize;
Scalar res;
if(alignedSize)
{
PacketScalar packet_res = mat.template packet<alignment>(alignedStart);
for(Index index = alignedStart + packetSize; index < alignedEnd; index += packetSize)
packet_res = func.packetOp(packet_res, mat.template packet<alignment>(index));
res = func.predux(packet_res);
PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
{
PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
{
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
}
packet_res0 = func.packetOp(packet_res0,packet_res1);
if(alignedEnd>alignedEnd2)
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
}
res = func.predux(packet_res0);
for(Index index = 0; index < alignedStart; ++index)
res = func(res,mat.coeff(index));
@@ -296,7 +309,7 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
Size = Derived::SizeAtCompileTime,
VectorizedSize = (Size / PacketSize) * PacketSize
};
EIGEN_STRONG_INLINE static Scalar run(const Derived& mat, const Func& func)
static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
{
eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));

View File

@@ -122,9 +122,13 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
return m_matrix.template packet<LoadMode>(actual_row, actual_col);
}
const typename internal::remove_all<typename MatrixType::Nested>::type& nestedExpression() const
{
return m_matrix;
}
protected:
const typename MatrixType::Nested m_matrix;
typename MatrixType::Nested m_matrix;
const internal::variable_if_dynamic<Index, RowFactor> m_rowFactor;
const internal::variable_if_dynamic<Index, ColFactor> m_colFactor;
};

View File

@@ -183,8 +183,14 @@ template<typename MatrixType, int Direction> class Reverse
m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, internal::preverse(x));
}
const typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() const
{
return m_matrix;
}
protected:
const typename MatrixType::Nested m_matrix;
typename MatrixType::Nested m_matrix;
};
/** \returns an expression of the reverse of *this.

View File

@@ -101,10 +101,25 @@ class Select : internal::no_assignment_operator,
return m_else.coeff(i);
}
const ConditionMatrixType& conditionMatrix() const
{
return m_condition;
}
const ThenMatrixType& thenMatrix() const
{
return m_then;
}
const ElseMatrixType& elseMatrix() const
{
return m_else;
}
protected:
const typename ConditionMatrixType::Nested m_condition;
const typename ThenMatrixType::Nested m_then;
const typename ElseMatrixType::Nested m_else;
typename ConditionMatrixType::Nested m_condition;
typename ThenMatrixType::Nested m_then;
typename ElseMatrixType::Nested m_else;
};

View File

@@ -82,7 +82,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
};
typedef typename MatrixType::PlainObject PlainObject;
inline SelfAdjointView(const MatrixType& matrix) : m_matrix(matrix)
inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
{}
inline Index rows() const { return m_matrix.rows(); }
@@ -199,7 +199,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
#endif
protected:
const MatrixTypeNested m_matrix;
MatrixTypeNested m_matrix;
};
@@ -222,7 +222,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), U
row = (UnrollCount-1) % Derived1::RowsAtCompileTime
};
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount-1, ClearOpposite>::run(dst, src);
@@ -236,7 +236,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), U
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, 0, ClearOpposite>
{
inline static void run(Derived1 &, const Derived2 &) {}
static inline void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite>
@@ -247,7 +247,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), U
row = (UnrollCount-1) % Derived1::RowsAtCompileTime
};
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount-1, ClearOpposite>::run(dst, src);
@@ -261,14 +261,14 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), U
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, 0, ClearOpposite>
{
inline static void run(Derived1 &, const Derived2 &) {}
static inline void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
@@ -285,7 +285,7 @@ struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dyn
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dynamic, ClearOpposite>
{
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
typedef typename Derived1::Index Index;
for(Index i = 0; i < dst.rows(); ++i)

View File

@@ -163,6 +163,16 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
return Base::operator=(rhs);
}
Lhs& expression() const
{
return m_matrix;
}
const BinaryOp& functor() const
{
return m_functor;
}
protected:
Lhs& m_matrix;
const BinaryOp& m_functor;

View File

@@ -100,7 +100,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
static void run(const Lhs& lhs, Rhs& rhs)
{
const ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsProductTraits::extract(lhs);
triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor>
::run(lhs.rows(), Side==OnTheLeft? rhs.cols() : rhs.rows(), &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride());
@@ -177,10 +177,8 @@ template<int Side, typename OtherDerived>
void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
{
OtherDerived& other = _other.const_cast_derived();
eigen_assert(cols() == rows());
eigen_assert( (Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols()) );
eigen_assert(!(Mode & ZeroDiag));
eigen_assert((Mode & (Upper|Lower)) != 0);
eigen_assert( cols() == rows() && ((Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols())) );
eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit && OtherDerived::IsVectorAtCompileTime };
typedef typename internal::conditional<copy,
@@ -255,7 +253,7 @@ template<int Side, typename TriangularType, typename Rhs> struct triangular_solv
protected:
const TriangularType& m_triangularMatrix;
const typename Rhs::Nested m_rhs;
typename Rhs::Nested m_rhs;
};
} // namespace internal

View File

@@ -58,9 +58,9 @@ MatrixBase<Derived>::stableNorm() const
{
using std::min;
const Index blockSize = 4096;
RealScalar scale = 0;
RealScalar invScale = 1;
RealScalar ssq = 0; // sum of square
RealScalar scale(0);
RealScalar invScale(1);
RealScalar ssq(0); // sum of square
enum {
Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0
};

View File

@@ -52,6 +52,15 @@ template<typename ExpressionType> class SwapWrapper
inline Index cols() const { return m_expression.cols(); }
inline Index outerStride() const { return m_expression.outerStride(); }
inline Index innerStride() const { return m_expression.innerStride(); }
typedef typename internal::conditional<
internal::is_lvalue<ExpressionType>::value,
Scalar,
const Scalar
>::type ScalarWithConstIfNotLvalue;
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
inline const Scalar* data() const { return m_expression.data(); }
inline Scalar& coeffRef(Index row, Index col)
{
@@ -119,6 +128,8 @@ template<typename ExpressionType> class SwapWrapper
_other.template writePacket<LoadMode>(index, tmp);
}
ExpressionType& expression() const { return m_expression; }
protected:
ExpressionType& m_expression;
};

View File

@@ -91,7 +91,7 @@ template<typename MatrixType> class Transpose
nestedExpression() { return m_matrix.const_cast_derived(); }
protected:
const typename MatrixType::Nested m_matrix;
typename MatrixType::Nested m_matrix;
};
namespace internal {
@@ -152,12 +152,12 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
return derived().nestedExpression().coeffRef(index);
}
inline const CoeffReturnType coeff(Index row, Index col) const
inline CoeffReturnType coeff(Index row, Index col) const
{
return derived().nestedExpression().coeff(col, row);
}
inline const CoeffReturnType coeff(Index index) const
inline CoeffReturnType coeff(Index index) const
{
return derived().nestedExpression().coeff(index);
}

View File

@@ -404,7 +404,7 @@ struct transposition_matrix_product_retval
protected:
const TranspositionType& m_transpositions;
const typename MatrixType::Nested m_matrix;
typename MatrixType::Nested m_matrix;
};
} // end namespace internal

View File

@@ -273,11 +273,8 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
inline const TriangularView<MatrixConjugateReturnType,Mode> conjugate() const
{ return m_matrix.conjugate(); }
/** \sa MatrixBase::adjoint() */
inline TriangularView<typename MatrixType::AdjointReturnType,TransposeMode> adjoint()
{ return m_matrix.adjoint(); }
/** \sa MatrixBase::adjoint() const */
inline const TriangularView<typename MatrixType::AdjointReturnType,TransposeMode> adjoint() const
inline const TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> adjoint() const
{ return m_matrix.adjoint(); }
/** \sa MatrixBase::transpose() */
@@ -288,11 +285,13 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
}
/** \sa MatrixBase::transpose() const */
inline const TriangularView<Transpose<MatrixType>,TransposeMode> transpose() const
{ return m_matrix.transpose(); }
{
return m_matrix.transpose();
}
/** Efficient triangular matrix times vector/matrix product */
template<typename OtherDerived>
TriangularProduct<Mode,true,MatrixType,false,OtherDerived,OtherDerived::IsVectorAtCompileTime>
TriangularProduct<Mode,true,MatrixType,false,OtherDerived, OtherDerived::IsVectorAtCompileTime>
operator*(const MatrixBase<OtherDerived>& rhs) const
{
return TriangularProduct
@@ -375,7 +374,8 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
template<typename OtherDerived>
void swap(MatrixBase<OtherDerived> const & other)
{
TriangularView<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived());
SwapWrapper<MatrixType> swaper(const_cast<MatrixType&>(m_matrix));
TriangularView<SwapWrapper<MatrixType>,Mode>(swaper).lazyAssign(other.derived());
}
Scalar determinant() const
@@ -433,7 +433,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_STRONG_INLINE TriangularView& assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha);
const MatrixTypeNested m_matrix;
MatrixTypeNested m_matrix;
};
/***************************************************************************
@@ -452,7 +452,7 @@ struct triangular_assignment_selector
typedef typename Derived1::Scalar Scalar;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
triangular_assignment_selector<Derived1, Derived2, Mode, UnrollCount-1, ClearOpposite>::run(dst, src);
@@ -480,7 +480,7 @@ struct triangular_assignment_selector
template<typename Derived1, typename Derived2, unsigned int Mode, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, Mode, 0, ClearOpposite>
{
inline static void run(Derived1 &, const Derived2 &) {}
static inline void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
@@ -488,7 +488,7 @@ struct triangular_assignment_selector<Derived1, Derived2, Upper, Dynamic, ClearO
{
typedef typename Derived1::Index Index;
typedef typename Derived1::Scalar Scalar;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
@@ -506,7 +506,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
@@ -524,7 +524,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
@@ -542,7 +542,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
@@ -560,7 +560,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
@@ -580,7 +580,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{

View File

@@ -110,7 +110,7 @@ class PartialReduxExpr : internal::no_assignment_operator,
}
protected:
const MatrixTypeNested m_matrix;
MatrixTypeNested m_matrix;
const MemberOp m_functor;
};
@@ -237,7 +237,10 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
typename ExtendedType<OtherDerived>::Type
extendedTo(const DenseBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Vertical, OtherDerived::MaxColsAtCompileTime==1),
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Horizontal, OtherDerived::MaxRowsAtCompileTime==1),
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
return typename ExtendedType<OtherDerived>::Type
(other.derived(),
Direction==Vertical ? 1 : m_matrix.rows(),
@@ -418,10 +421,9 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
ExpressionType& operator=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
//eigen_assert((m_matrix.isNull()) == (other.isNull())); FIXME
for(Index j=0; j<subVectors(); ++j)
subVector(j) = other;
return const_cast<ExpressionType&>(m_matrix);
return const_cast<ExpressionType&>(m_matrix = extendedTo(other.derived()));
}
/** Adds the vector \a other to each subvector of \c *this */
@@ -429,9 +431,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
ExpressionType& operator+=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
for(Index j=0; j<subVectors(); ++j)
subVector(j) += other.derived();
return const_cast<ExpressionType&>(m_matrix);
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
return const_cast<ExpressionType&>(m_matrix += extendedTo(other.derived()));
}
/** Substracts the vector \a other to each subvector of \c *this */
@@ -439,8 +440,29 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
ExpressionType& operator-=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
for(Index j=0; j<subVectors(); ++j)
subVector(j) -= other.derived();
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
return const_cast<ExpressionType&>(m_matrix -= extendedTo(other.derived()));
}
/** Multiples each subvector of \c *this by the vector \a other */
template<typename OtherDerived>
ExpressionType& operator*=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
m_matrix *= extendedTo(other.derived());
return const_cast<ExpressionType&>(m_matrix);
}
/** Divides each subvector of \c *this by the vector \a other */
template<typename OtherDerived>
ExpressionType& operator/=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
m_matrix /= extendedTo(other.derived());
return const_cast<ExpressionType&>(m_matrix);
}
@@ -451,7 +473,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
const typename ExtendedType<OtherDerived>::Type>
operator+(const DenseBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
return m_matrix + extendedTo(other.derived());
}
@@ -462,10 +485,39 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
const typename ExtendedType<OtherDerived>::Type>
operator-(const DenseBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
return m_matrix - extendedTo(other.derived());
}
/** Returns the expression where each subvector is the product of the vector \a other
* by the corresponding subvector of \c *this */
template<typename OtherDerived> EIGEN_STRONG_INLINE
CwiseBinaryOp<internal::scalar_product_op<Scalar>,
const ExpressionTypeNestedCleaned,
const typename ExtendedType<OtherDerived>::Type>
operator*(const DenseBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
return m_matrix * extendedTo(other.derived());
}
/** Returns the expression where each subvector is the quotient of the corresponding
* subvector of \c *this by the vector \a other */
template<typename OtherDerived>
CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
const ExpressionTypeNestedCleaned,
const typename ExtendedType<OtherDerived>::Type>
operator/(const DenseBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
return m_matrix / extendedTo(other.derived());
}
/////////// Geometry module ///////////
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
@@ -509,7 +561,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Example: \include MatrixBase_colwise.cpp
* Output: \verbinclude MatrixBase_colwise.out
*
* \sa rowwise(), class VectorwiseOp
* \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
*/
template<typename Derived>
inline const typename DenseBase<Derived>::ConstColwiseReturnType
@@ -520,7 +572,7 @@ DenseBase<Derived>::colwise() const
/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
*
* \sa rowwise(), class VectorwiseOp
* \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
*/
template<typename Derived>
inline typename DenseBase<Derived>::ColwiseReturnType
@@ -534,7 +586,7 @@ DenseBase<Derived>::colwise()
* Example: \include MatrixBase_rowwise.cpp
* Output: \verbinclude MatrixBase_rowwise.out
*
* \sa colwise(), class VectorwiseOp
* \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
*/
template<typename Derived>
inline const typename DenseBase<Derived>::ConstRowwiseReturnType
@@ -545,7 +597,7 @@ DenseBase<Derived>::rowwise() const
/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
*
* \sa colwise(), class VectorwiseOp
* \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
*/
template<typename Derived>
inline typename DenseBase<Derived>::RowwiseReturnType

View File

@@ -35,7 +35,7 @@ struct visitor_impl
row = (UnrollCount-1) % Derived::RowsAtCompileTime
};
inline static void run(const Derived &mat, Visitor& visitor)
static inline void run(const Derived &mat, Visitor& visitor)
{
visitor_impl<Visitor, Derived, UnrollCount-1>::run(mat, visitor);
visitor(mat.coeff(row, col), row, col);
@@ -45,7 +45,7 @@ struct visitor_impl
template<typename Visitor, typename Derived>
struct visitor_impl<Visitor, Derived, 1>
{
inline static void run(const Derived &mat, Visitor& visitor)
static inline void run(const Derived &mat, Visitor& visitor)
{
return visitor.init(mat.coeff(0, 0), 0, 0);
}
@@ -55,7 +55,7 @@ template<typename Visitor, typename Derived>
struct visitor_impl<Visitor, Derived, Dynamic>
{
typedef typename Derived::Index Index;
inline static void run(const Derived& mat, Visitor& visitor)
static inline void run(const Derived& mat, Visitor& visitor)
{
visitor.init(mat.coeff(0,0), 0, 0);
for(Index i = 1; i < mat.rows(); ++i)

View File

@@ -168,7 +168,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
template<int Offset>
struct palign_impl<Offset,Packet2cf>
{
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
{
if (Offset==1)
{

View File

@@ -487,7 +487,7 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
template<int Offset>
struct palign_impl<Offset,Packet4f>
{
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
{
if (Offset!=0)
first = vec_sld(first, second, Offset*4);
@@ -497,7 +497,7 @@ struct palign_impl<Offset,Packet4f>
template<int Offset>
struct palign_impl<Offset,Packet4i>
{
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
{
if (Offset!=0)
first = vec_sld(first, second, Offset*4);

View File

@@ -102,7 +102,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<flo
Packet2cf res;
#if EIGEN_GNUC_AT_MOST(4,2)
// workaround annoying "may be used uninitialized in this function" warning with gcc 4.2
res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)&from);
res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from));
#else
res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
#endif
@@ -151,7 +151,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
template<int Offset>
struct palign_impl<Offset,Packet2cf>
{
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
{
if (Offset==1)
{
@@ -350,7 +350,7 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const
template<int Offset>
struct palign_impl<Offset,Packet1cd>
{
EIGEN_STRONG_INLINE static void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
{
// FIXME is it sure we never have to align a Packet1cd?
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...

View File

@@ -110,9 +110,18 @@ template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
#if defined(_MSC_VER) && (_MSC_VER==1500)
// Workaround MSVC 9 internal compiler error.
// TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode
// TODO: let's check whether there does not exist a better fix, like adding a pset0() function. (it crashed on pset1(0)).
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps(from,from,from,from); }
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
#else
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set1_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
#endif
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
@@ -282,7 +291,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
{
return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd((const double*)from)), 0, 0, 1, 1);
return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(from))), 0, 0, 1, 1);
}
template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
{ return pset1<Packet2d>(from[0]); }
@@ -302,8 +311,8 @@ template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d&
_mm_storel_pd((to), from);
_mm_storeh_pd((to+1), from);
}
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, _mm_castps_pd(from)); }
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, _mm_castsi128_pd(from)); }
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), _mm_castps_pd(from)); }
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), _mm_castsi128_pd(from)); }
// some compilers might be tempted to perform multiple moves instead of using a vector path.
template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
@@ -541,7 +550,7 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
template<int Offset>
struct palign_impl<Offset,Packet4f>
{
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
{
if (Offset!=0)
first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
@@ -551,7 +560,7 @@ struct palign_impl<Offset,Packet4f>
template<int Offset>
struct palign_impl<Offset,Packet4i>
{
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
{
if (Offset!=0)
first = _mm_alignr_epi8(second,first, Offset*4);
@@ -561,7 +570,7 @@ struct palign_impl<Offset,Packet4i>
template<int Offset>
struct palign_impl<Offset,Packet2d>
{
EIGEN_STRONG_INLINE static void run(Packet2d& first, const Packet2d& second)
static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
{
if (Offset==1)
first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
@@ -572,7 +581,7 @@ struct palign_impl<Offset,Packet2d>
template<int Offset>
struct palign_impl<Offset,Packet4f>
{
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
{
if (Offset==1)
{
@@ -595,7 +604,7 @@ struct palign_impl<Offset,Packet4f>
template<int Offset>
struct palign_impl<Offset,Packet4i>
{
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
{
if (Offset==1)
{
@@ -618,7 +627,7 @@ struct palign_impl<Offset,Packet4i>
template<int Offset>
struct palign_impl<Offset,Packet2d>
{
EIGEN_STRONG_INLINE static void run(Packet2d& first, const Packet2d& second)
static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
{
if (Offset==1)
{

View File

@@ -224,8 +224,8 @@ class CoeffBasedProduct
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
protected:
const LhsNested m_lhs;
const RhsNested m_rhs;
typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
mutable PlainObject m_result;
};
@@ -252,7 +252,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
@@ -263,7 +263,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
}
@@ -273,7 +273,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
{
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
@@ -291,7 +291,7 @@ struct product_coeff_vectorized_unroller
{
typedef typename Lhs::Index Index;
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
{
product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
@@ -302,7 +302,7 @@ template<typename Lhs, typename Rhs, typename Packet>
struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
{
pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
}
@@ -314,7 +314,7 @@ struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, Re
typedef typename Lhs::PacketScalar Packet;
typedef typename Lhs::Index Index;
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
Packet pres;
product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
@@ -327,7 +327,7 @@ template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int R
struct product_coeff_vectorized_dyn_selector
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
}
@@ -339,7 +339,7 @@ template<typename Lhs, typename Rhs, int RhsCols>
struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
}
@@ -349,7 +349,7 @@ template<typename Lhs, typename Rhs, int LhsRows>
struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
}
@@ -359,7 +359,7 @@ template<typename Lhs, typename Rhs>
struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
res = lhs.transpose().cwiseProduct(rhs).sum();
}
@@ -369,7 +369,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
}
@@ -383,7 +383,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo
struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
@@ -394,7 +394,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo
struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
@@ -405,7 +405,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
}
@@ -415,7 +415,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
}
@@ -425,7 +425,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
{
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
@@ -438,7 +438,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
{
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));

View File

@@ -30,19 +30,18 @@ namespace internal {
template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false>
class gebp_traits;
/** \internal \returns b if a<=0, and returns a otherwise. */
inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff_t b)
{
return a<=0 ? b : a;
}
/** \internal */
inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0)
{
static std::ptrdiff_t m_l1CacheSize = 0;
static std::ptrdiff_t m_l2CacheSize = 0;
if(m_l1CacheSize==0)
{
m_l1CacheSize = queryL1CacheSize();
m_l2CacheSize = queryTopLevelCacheSize();
if(m_l1CacheSize<=0) m_l1CacheSize = 8 * 1024;
if(m_l2CacheSize<=0) m_l2CacheSize = 1 * 1024 * 1024;
}
static std::ptrdiff_t m_l1CacheSize = manage_caching_sizes_helper(queryL1CacheSize(),8 * 1024);
static std::ptrdiff_t m_l2CacheSize = manage_caching_sizes_helper(queryTopLevelCacheSize(),1*1024*1024);
if(action==SetAction)
{
@@ -536,7 +535,7 @@ struct gebp_kernel
ResPacketSize = Traits::ResPacketSize
};
EIGEN_FLATTEN_ATTRIB
EIGEN_DONT_INLINE EIGEN_FLATTEN_ATTRIB
void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB = 0)
{
@@ -598,64 +597,64 @@ struct gebp_kernel
if(nr==2)
{
LhsPacket A0, A1;
RhsPacket B0;
RhsPacket B_0;
RhsPacket T0;
EIGEN_ASM_COMMENT("mybegin2");
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadLhs(&blA[1*LhsProgress], A1);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[1*RhsProgress], B0);
traits.madd(A0,B0,C1,T0);
traits.madd(A1,B0,C5,B0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[1*RhsProgress], B_0);
traits.madd(A0,B_0,C1,T0);
traits.madd(A1,B_0,C5,B_0);
traits.loadLhs(&blA[2*LhsProgress], A0);
traits.loadLhs(&blA[3*LhsProgress], A1);
traits.loadRhs(&blB[2*RhsProgress], B0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[3*RhsProgress], B0);
traits.madd(A0,B0,C1,T0);
traits.madd(A1,B0,C5,B0);
traits.loadRhs(&blB[2*RhsProgress], B_0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[3*RhsProgress], B_0);
traits.madd(A0,B_0,C1,T0);
traits.madd(A1,B_0,C5,B_0);
traits.loadLhs(&blA[4*LhsProgress], A0);
traits.loadLhs(&blA[5*LhsProgress], A1);
traits.loadRhs(&blB[4*RhsProgress], B0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[5*RhsProgress], B0);
traits.madd(A0,B0,C1,T0);
traits.madd(A1,B0,C5,B0);
traits.loadRhs(&blB[4*RhsProgress], B_0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[5*RhsProgress], B_0);
traits.madd(A0,B_0,C1,T0);
traits.madd(A1,B_0,C5,B_0);
traits.loadLhs(&blA[6*LhsProgress], A0);
traits.loadLhs(&blA[7*LhsProgress], A1);
traits.loadRhs(&blB[6*RhsProgress], B0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[7*RhsProgress], B0);
traits.madd(A0,B0,C1,T0);
traits.madd(A1,B0,C5,B0);
traits.loadRhs(&blB[6*RhsProgress], B_0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[7*RhsProgress], B_0);
traits.madd(A0,B_0,C1,T0);
traits.madd(A1,B_0,C5,B_0);
EIGEN_ASM_COMMENT("myend");
}
else
{
EIGEN_ASM_COMMENT("mybegin4");
LhsPacket A0, A1;
RhsPacket B0, B1, B2, B3;
RhsPacket B_0, B1, B2, B3;
RhsPacket T0;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadLhs(&blA[1*LhsProgress], A1);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.loadRhs(&blB[1*RhsProgress], B1);
traits.madd(A0,B0,C0,T0);
traits.madd(A0,B_0,C0,T0);
traits.loadRhs(&blB[2*RhsProgress], B2);
traits.madd(A1,B0,C4,B0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[3*RhsProgress], B3);
traits.loadRhs(&blB[4*RhsProgress], B0);
traits.loadRhs(&blB[4*RhsProgress], B_0);
traits.madd(A0,B1,C1,T0);
traits.madd(A1,B1,C5,B1);
traits.loadRhs(&blB[5*RhsProgress], B1);
@@ -667,9 +666,9 @@ EIGEN_ASM_COMMENT("mybegin4");
traits.madd(A1,B3,C7,B3);
traits.loadLhs(&blA[3*LhsProgress], A1);
traits.loadRhs(&blB[7*RhsProgress], B3);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[8*RhsProgress], B0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[8*RhsProgress], B_0);
traits.madd(A0,B1,C1,T0);
traits.madd(A1,B1,C5,B1);
traits.loadRhs(&blB[9*RhsProgress], B1);
@@ -682,9 +681,9 @@ EIGEN_ASM_COMMENT("mybegin4");
traits.loadLhs(&blA[5*LhsProgress], A1);
traits.loadRhs(&blB[11*RhsProgress], B3);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[12*RhsProgress], B0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[12*RhsProgress], B_0);
traits.madd(A0,B1,C1,T0);
traits.madd(A1,B1,C5,B1);
traits.loadRhs(&blB[13*RhsProgress], B1);
@@ -696,8 +695,8 @@ EIGEN_ASM_COMMENT("mybegin4");
traits.madd(A1,B3,C7,B3);
traits.loadLhs(&blA[7*LhsProgress], A1);
traits.loadRhs(&blB[15*RhsProgress], B3);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.madd(A0,B1,C1,T0);
traits.madd(A1,B1,C5,B1);
traits.madd(A0,B2,C2,T0);
@@ -715,32 +714,32 @@ EIGEN_ASM_COMMENT("mybegin4");
if(nr==2)
{
LhsPacket A0, A1;
RhsPacket B0;
RhsPacket B_0;
RhsPacket T0;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadLhs(&blA[1*LhsProgress], A1);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[1*RhsProgress], B0);
traits.madd(A0,B0,C1,T0);
traits.madd(A1,B0,C5,B0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[1*RhsProgress], B_0);
traits.madd(A0,B_0,C1,T0);
traits.madd(A1,B_0,C5,B_0);
}
else
{
LhsPacket A0, A1;
RhsPacket B0, B1, B2, B3;
RhsPacket B_0, B1, B2, B3;
RhsPacket T0;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadLhs(&blA[1*LhsProgress], A1);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.loadRhs(&blB[1*RhsProgress], B1);
traits.madd(A0,B0,C0,T0);
traits.madd(A0,B_0,C0,T0);
traits.loadRhs(&blB[2*RhsProgress], B2);
traits.madd(A1,B0,C4,B0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[3*RhsProgress], B3);
traits.madd(A0,B1,C1,T0);
traits.madd(A1,B1,C5,B1);
@@ -827,42 +826,42 @@ EIGEN_ASM_COMMENT("mybegin4");
if(nr==2)
{
LhsPacket A0;
RhsPacket B0, B1;
RhsPacket B_0, B1;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.loadRhs(&blB[1*RhsProgress], B1);
traits.madd(A0,B0,C0,B0);
traits.loadRhs(&blB[2*RhsProgress], B0);
traits.madd(A0,B_0,C0,B_0);
traits.loadRhs(&blB[2*RhsProgress], B_0);
traits.madd(A0,B1,C1,B1);
traits.loadLhs(&blA[1*LhsProgress], A0);
traits.loadRhs(&blB[3*RhsProgress], B1);
traits.madd(A0,B0,C0,B0);
traits.loadRhs(&blB[4*RhsProgress], B0);
traits.madd(A0,B_0,C0,B_0);
traits.loadRhs(&blB[4*RhsProgress], B_0);
traits.madd(A0,B1,C1,B1);
traits.loadLhs(&blA[2*LhsProgress], A0);
traits.loadRhs(&blB[5*RhsProgress], B1);
traits.madd(A0,B0,C0,B0);
traits.loadRhs(&blB[6*RhsProgress], B0);
traits.madd(A0,B_0,C0,B_0);
traits.loadRhs(&blB[6*RhsProgress], B_0);
traits.madd(A0,B1,C1,B1);
traits.loadLhs(&blA[3*LhsProgress], A0);
traits.loadRhs(&blB[7*RhsProgress], B1);
traits.madd(A0,B0,C0,B0);
traits.madd(A0,B_0,C0,B_0);
traits.madd(A0,B1,C1,B1);
}
else
{
LhsPacket A0;
RhsPacket B0, B1, B2, B3;
RhsPacket B_0, B1, B2, B3;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.loadRhs(&blB[1*RhsProgress], B1);
traits.madd(A0,B0,C0,B0);
traits.madd(A0,B_0,C0,B_0);
traits.loadRhs(&blB[2*RhsProgress], B2);
traits.loadRhs(&blB[3*RhsProgress], B3);
traits.loadRhs(&blB[4*RhsProgress], B0);
traits.loadRhs(&blB[4*RhsProgress], B_0);
traits.madd(A0,B1,C1,B1);
traits.loadRhs(&blB[5*RhsProgress], B1);
traits.madd(A0,B2,C2,B2);
@@ -870,8 +869,8 @@ EIGEN_ASM_COMMENT("mybegin4");
traits.madd(A0,B3,C3,B3);
traits.loadLhs(&blA[1*LhsProgress], A0);
traits.loadRhs(&blB[7*RhsProgress], B3);
traits.madd(A0,B0,C0,B0);
traits.loadRhs(&blB[8*RhsProgress], B0);
traits.madd(A0,B_0,C0,B_0);
traits.loadRhs(&blB[8*RhsProgress], B_0);
traits.madd(A0,B1,C1,B1);
traits.loadRhs(&blB[9*RhsProgress], B1);
traits.madd(A0,B2,C2,B2);
@@ -880,8 +879,8 @@ EIGEN_ASM_COMMENT("mybegin4");
traits.loadLhs(&blA[2*LhsProgress], A0);
traits.loadRhs(&blB[11*RhsProgress], B3);
traits.madd(A0,B0,C0,B0);
traits.loadRhs(&blB[12*RhsProgress], B0);
traits.madd(A0,B_0,C0,B_0);
traits.loadRhs(&blB[12*RhsProgress], B_0);
traits.madd(A0,B1,C1,B1);
traits.loadRhs(&blB[13*RhsProgress], B1);
traits.madd(A0,B2,C2,B2);
@@ -890,7 +889,7 @@ EIGEN_ASM_COMMENT("mybegin4");
traits.loadLhs(&blA[3*LhsProgress], A0);
traits.loadRhs(&blB[15*RhsProgress], B3);
traits.madd(A0,B0,C0,B0);
traits.madd(A0,B_0,C0,B_0);
traits.madd(A0,B1,C1,B1);
traits.madd(A0,B2,C2,B2);
traits.madd(A0,B3,C3,B3);
@@ -905,26 +904,26 @@ EIGEN_ASM_COMMENT("mybegin4");
if(nr==2)
{
LhsPacket A0;
RhsPacket B0, B1;
RhsPacket B_0, B1;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.loadRhs(&blB[1*RhsProgress], B1);
traits.madd(A0,B0,C0,B0);
traits.madd(A0,B_0,C0,B_0);
traits.madd(A0,B1,C1,B1);
}
else
{
LhsPacket A0;
RhsPacket B0, B1, B2, B3;
RhsPacket B_0, B1, B2, B3;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.loadRhs(&blB[1*RhsProgress], B1);
traits.loadRhs(&blB[2*RhsProgress], B2);
traits.loadRhs(&blB[3*RhsProgress], B3);
traits.madd(A0,B0,C0,B0);
traits.madd(A0,B_0,C0,B_0);
traits.madd(A0,B1,C1,B1);
traits.madd(A0,B2,C2,B2);
traits.madd(A0,B3,C3,B3);
@@ -971,26 +970,26 @@ EIGEN_ASM_COMMENT("mybegin4");
if(nr==2)
{
LhsScalar A0;
RhsScalar B0, B1;
RhsScalar B_0, B1;
A0 = blA[k];
B0 = blB[0];
B_0 = blB[0];
B1 = blB[1];
MADD(cj,A0,B0,C0,B0);
MADD(cj,A0,B_0,C0,B_0);
MADD(cj,A0,B1,C1,B1);
}
else
{
LhsScalar A0;
RhsScalar B0, B1, B2, B3;
RhsScalar B_0, B1, B2, B3;
A0 = blA[k];
B0 = blB[0];
B_0 = blB[0];
B1 = blB[1];
B2 = blB[2];
B3 = blB[3];
MADD(cj,A0,B0,C0,B0);
MADD(cj,A0,B_0,C0,B_0);
MADD(cj,A0,B1,C1,B1);
MADD(cj,A0,B2,C2,B2);
MADD(cj,A0,B3,C3,B3);
@@ -1027,14 +1026,14 @@ EIGEN_ASM_COMMENT("mybegin4");
for(Index k=0; k<depth; k++)
{
LhsPacket A0, A1;
RhsPacket B0;
RhsPacket B_0;
RhsPacket T0;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadLhs(&blA[1*LhsProgress], A1);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
blB += RhsProgress;
blA += 2*LhsProgress;
@@ -1066,10 +1065,10 @@ EIGEN_ASM_COMMENT("mybegin4");
for(Index k=0; k<depth; k++)
{
LhsPacket A0;
RhsPacket B0;
RhsPacket B_0;
traits.loadLhs(blA, A0);
traits.loadRhs(blB, B0);
traits.madd(A0, B0, C0, B0);
traits.loadRhs(blB, B_0);
traits.madd(A0, B_0, C0, B_0);
blB += RhsProgress;
blA += LhsProgress;
}
@@ -1091,8 +1090,8 @@ EIGEN_ASM_COMMENT("mybegin4");
for(Index k=0; k<depth; k++)
{
LhsScalar A0 = blA[k];
RhsScalar B0 = blB[k];
MADD(cj, A0, B0, C0, B0);
RhsScalar B_0 = blB[k];
MADD(cj, A0, B_0, C0, B_0);
}
res[(j2+0)*resStride + i] += alpha*C0;
}
@@ -1103,7 +1102,7 @@ EIGEN_ASM_COMMENT("mybegin4");
#undef CJMADD
// pack a block of the lhs
// The travesal is as follow (mr==4):
// The traversal is as follow (mr==4):
// 0 4 8 12 ...
// 1 5 9 13 ...
// 2 6 10 14 ...
@@ -1119,11 +1118,15 @@ EIGEN_ASM_COMMENT("mybegin4");
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
struct gemm_pack_lhs
{
void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
Index stride=0, Index offset=0)
{
// enum { PacketSize = packet_traits<Scalar>::size };
typedef typename packet_traits<Scalar>::type Packet;
enum { PacketSize = packet_traits<Scalar>::size };
EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
Index count = 0;
@@ -1131,9 +1134,44 @@ struct gemm_pack_lhs
for(Index i=0; i<peeled_mc; i+=Pack1)
{
if(PanelMode) count += Pack1 * offset;
for(Index k=0; k<depth; k++)
for(Index w=0; w<Pack1; w++)
blockA[count++] = cj(lhs(i+w, k));
if(StorageOrder==ColMajor)
{
for(Index k=0; k<depth; k++)
{
Packet A, B, C, D;
if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
}
}
else
{
for(Index k=0; k<depth; k++)
{
// TODO add a vectorized transpose here
Index w=0;
for(; w<Pack1-3; w+=4)
{
Scalar a(cj(lhs(i+w+0, k))),
b(cj(lhs(i+w+1, k))),
c(cj(lhs(i+w+2, k))),
d(cj(lhs(i+w+3, k)));
blockA[count++] = a;
blockA[count++] = b;
blockA[count++] = c;
blockA[count++] = d;
}
if(Pack1%4)
for(;w<Pack1;++w)
blockA[count++] = cj(lhs(i+w, k));
}
}
if(PanelMode) count += Pack1 * (stride-offset-depth);
}
if(rows-peeled_mc>=Pack2)
@@ -1167,9 +1205,10 @@ struct gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
{
typedef typename packet_traits<Scalar>::type Packet;
enum { PacketSize = packet_traits<Scalar>::size };
void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
Index stride=0, Index offset=0)
{
EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
Index packet_cols = (cols/nr) * nr;
@@ -1214,9 +1253,10 @@ template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode
struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
{
enum { PacketSize = packet_traits<Scalar>::size };
void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
Index stride=0, Index offset=0)
{
EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
Index packet_cols = (cols/nr) * nr;

View File

@@ -412,8 +412,8 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);

View File

@@ -42,14 +42,14 @@ struct tribb_kernel;
template <typename Index,
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
int ResStorageOrder, int UpLo>
int ResStorageOrder, int UpLo, int Version = Specialized>
struct general_matrix_matrix_triangular_product;
// as usual if the result is row major => we transpose the product
template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo>
{
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo,Version>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
@@ -63,8 +63,8 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
};
template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo>
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Version>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
@@ -201,13 +201,13 @@ TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
const ActualLhs actualLhs = LhsBlasTraits::extract(prod.lhs());
typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
typedef typename internal::remove_all<typename ProductDerived::RhsNested>::type Rhs;
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
const ActualRhs actualRhs = RhsBlasTraits::extract(prod.rhs());
typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
typename ProductDerived::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());

View File

@@ -0,0 +1,142 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Level 3 BLAS SYRK/HERK implementation.
********************************************************************************
*/
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
namespace internal {
template <typename Index, typename Scalar, int AStorageOrder, bool ConjugateA, int ResStorageOrder, int UpLo>
struct general_matrix_matrix_rankupdate :
general_matrix_matrix_triangular_product<
Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,UpLo,BuiltIn> {};
// try to go to BLAS specialization
#define EIGEN_MKL_RANKUPDATE_SPECIALIZE(Scalar) \
template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs, int UpLo> \
struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \
Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Specialized> { \
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) \
{ \
if (lhs==rhs) { \
general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
} else { \
general_matrix_matrix_triangular_product<Index, \
Scalar, LhsStorageOrder, ConjugateLhs, \
Scalar, RhsStorageOrder, ConjugateRhs, \
ColMajor, UpLo, BuiltIn> \
::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
} \
} \
};
EIGEN_MKL_RANKUPDATE_SPECIALIZE(double)
//EIGEN_MKL_RANKUPDATE_SPECIALIZE(dcomplex)
EIGEN_MKL_RANKUPDATE_SPECIALIZE(float)
//EIGEN_MKL_RANKUPDATE_SPECIALIZE(scomplex)
// SYRK for float/double
#define EIGEN_MKL_RANKUPDATE_R(EIGTYPE, MKLTYPE, MKLFUNC) \
template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
enum { \
IsLower = (UpLo&Lower) == Lower, \
LowUp = IsLower ? Lower : Upper, \
conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \
}; \
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
{ \
/* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \
\
MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \
MKLTYPE alpha_, beta_; \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
MKLFUNC(&uplo, &trans, &n, &k, &alpha_, lhs, &lda, &beta_, res, &ldc); \
} \
};
// HERK for complex data
#define EIGEN_MKL_RANKUPDATE_C(EIGTYPE, MKLTYPE, RTYPE, MKLFUNC) \
template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
enum { \
IsLower = (UpLo&Lower) == Lower, \
LowUp = IsLower ? Lower : Upper, \
conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \
}; \
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
{ \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \
\
MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \
RTYPE alpha_, beta_; \
const EIGTYPE* a_ptr; \
\
/* Set alpha_ & beta_ */ \
/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); */\
/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1));*/ \
alpha_ = alpha.real(); \
beta_ = 1.0; \
/* Copy with conjugation in some cases*/ \
MatrixType a; \
if (conjA) { \
Map<const MatrixType, 0, OuterStride<> > mapA(lhs,n,k,OuterStride<>(lhsStride)); \
a = mapA.conjugate(); \
lda = a.outerStride(); \
a_ptr = a.data(); \
} else a_ptr=lhs; \
MKLFUNC(&uplo, &trans, &n, &k, &alpha_, (MKLTYPE*)a_ptr, &lda, &beta_, (MKLTYPE*)res, &ldc); \
} \
};
EIGEN_MKL_RANKUPDATE_R(double, double, dsyrk)
EIGEN_MKL_RANKUPDATE_R(float, float, ssyrk)
//EIGEN_MKL_RANKUPDATE_C(dcomplex, MKL_Complex16, double, zherk)
//EIGEN_MKL_RANKUPDATE_C(scomplex, MKL_Complex8, double, cherk)
} // end namespace internal
#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H

View File

@@ -0,0 +1,114 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* General matrix-matrix product functionality based on ?GEMM.
********************************************************************************
*/
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
#define EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
namespace internal {
/**********************************************************************
* This file implements general matrix-matrix multiplication using BLAS
* gemm function via partial specialization of
* general_matrix_matrix_product::run(..) method for float, double,
* std::complex<float> and std::complex<double> types
**********************************************************************/
// gemm specialization
#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, MKLTYPE, MKLPREFIX) \
template< \
typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor> \
{ \
static void run(Index rows, Index cols, Index depth, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha, \
level3_blocking<EIGTYPE, EIGTYPE>& blocking, \
GemmParallelInfo<Index>* info = 0) \
{ \
using std::conj; \
\
char transa, transb; \
MKL_INT m, n, k, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
MKLTYPE alpha_, beta_; \
MatrixX##EIGPREFIX a_tmp, b_tmp; \
EIGTYPE myone(1);\
\
/* Set transpose options */ \
transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
\
/* Set m, n, k */ \
m = (MKL_INT)rows; \
n = (MKL_INT)cols; \
k = (MKL_INT)depth; \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
\
/* Set lda, ldb, ldc */ \
lda = (MKL_INT)lhsStride; \
ldb = (MKL_INT)rhsStride; \
ldc = (MKL_INT)resStride; \
\
/* Set a, b, c */ \
if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \
a_tmp = lhs.conjugate(); \
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else a = _lhs; \
\
if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \
b_tmp = rhs.conjugate(); \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
} else b = _rhs; \
\
MKLPREFIX##gemm(&transa, &transb, &m, &n, &k, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
}};
GEMM_SPECIALIZATION(double, d, double, d)
GEMM_SPECIALIZATION(float, f, float, s)
GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, z)
GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, c)
} //end of namespase
#endif // EIGEN_GENERAL_MATRIX_MATRIX_MKL_H

View File

@@ -40,8 +40,8 @@ namespace internal {
* |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp
* |cplx |real |real | optimal case, vectorization possible via real-cplx mul
*/
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
@@ -99,7 +99,7 @@ EIGEN_DONT_INLINE static void run(
// How many coeffs of the result do we have to skip to be aligned.
// Here we assume data are at least aligned on the base scalar type.
Index alignedStart = first_aligned(res,size);
Index alignedStart = internal::first_aligned(res,size);
Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
@@ -109,7 +109,7 @@ EIGEN_DONT_INLINE static void run(
: FirstAligned;
// we cannot assume the first element is aligned because of sub-matrices
const Index lhsAlignmentOffset = first_aligned(lhs,size);
const Index lhsAlignmentOffset = internal::first_aligned(lhs,size);
// find how many columns do we have to skip to be aligned with the result (if possible)
Index skipColumns = 0;
@@ -296,8 +296,8 @@ EIGEN_DONT_INLINE static void run(
* - alpha is always a complex (or converted to a complex)
* - no vectorization
*/
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
@@ -351,7 +351,7 @@ EIGEN_DONT_INLINE static void run(
// How many coeffs of the result do we have to skip to be aligned.
// Here we assume data are at least aligned on the base scalar type
// if that's not the case then vectorization is discarded, see below.
Index alignedStart = first_aligned(rhs, depth);
Index alignedStart = internal::first_aligned(rhs, depth);
Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
@@ -361,7 +361,7 @@ EIGEN_DONT_INLINE static void run(
: FirstAligned;
// we cannot assume the first element is aligned because of sub-matrices
const Index lhsAlignmentOffset = first_aligned(lhs,depth);
const Index lhsAlignmentOffset = internal::first_aligned(lhs,depth);
// find how many rows do we have to skip to be aligned with rhs (if possible)
Index skipRows = 0;

View File

@@ -0,0 +1,127 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* General matrix-vector product functionality based on ?GEMV.
********************************************************************************
*/
#ifndef EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
#define EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
namespace internal {
/**********************************************************************
* This file implements general matrix-vector multiplication using BLAS
* gemv function via partial specialization of
* general_matrix_vector_product::run(..) method for float, double,
* std::complex<float> and std::complex<double> types
**********************************************************************/
// gemv specialization
template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct general_matrix_vector_product_gemv :
general_matrix_vector_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,ConjugateRhs,BuiltIn> {};
#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const Scalar* lhs, Index lhsStride, \
const Scalar* rhs, Index rhsIncr, \
Scalar* res, Index resIncr, Scalar alpha) \
{ \
if (ConjugateLhs) { \
general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,BuiltIn>::run( \
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
} else { \
general_matrix_vector_product_gemv<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
} \
} \
}; \
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const Scalar* lhs, Index lhsStride, \
const Scalar* rhs, Index rhsIncr, \
Scalar* res, Index resIncr, Scalar alpha) \
{ \
general_matrix_vector_product_gemv<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
} \
}; \
EIGEN_MKL_GEMV_SPECIALIZE(double)
EIGEN_MKL_GEMV_SPECIALIZE(float)
EIGEN_MKL_GEMV_SPECIALIZE(dcomplex)
EIGEN_MKL_GEMV_SPECIALIZE(scomplex)
#define EIGEN_MKL_GEMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLPREFIX) \
template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
{ \
typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\
\
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* rhs, Index rhsIncr, \
EIGTYPE* res, Index resIncr, EIGTYPE alpha) \
{ \
MKL_INT m=rows, n=cols, lda=lhsStride, incx=rhsIncr, incy=resIncr; \
MKLTYPE alpha_, beta_; \
const EIGTYPE *x_ptr, myone(1); \
char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \
if (LhsStorageOrder==RowMajor) { \
m=cols; \
n=rows; \
}\
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
GEMVVector x_tmp; \
if (ConjugateRhs) { \
Map<const GEMVVector, 0, InnerStride<> > map_x(rhs,cols,1,InnerStride<>(incx)); \
x_tmp=map_x.conjugate(); \
x_ptr=x_tmp.data(); \
incx=1; \
} else x_ptr=rhs; \
MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
}\
};
EIGEN_MKL_GEMV_SPECIALIZATION(double, double, d)
EIGEN_MKL_GEMV_SPECIALIZATION(float, float, s)
EIGEN_MKL_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, z)
EIGEN_MKL_GEMV_SPECIALIZATION(scomplex, MKL_Complex8, c)
} //end of namespase
#endif // EIGEN_GENERAL_MATRIX_VECTOR_MKL_H

View File

@@ -85,7 +85,9 @@ template<typename Index> struct GemmParallelInfo
template<bool Condition, typename Functor, typename Index>
void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
{
#ifndef EIGEN_HAS_OPENMP
// TODO when EIGEN_USE_BLAS is defined,
// we should still enable OMP for other scalar types
#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
// FIXME the transpose variable is only needed to properly split
// the matrix product when multithreading is enabled. This is a temporary
// fix to support row-major destination matrices. This whole

View File

@@ -400,8 +400,8 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);

View File

@@ -0,0 +1,291 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
********************************************************************************
*/
#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
#define EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
namespace internal {
/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
#define EIGEN_MKL_SYMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
{\
\
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha) \
{ \
char side='L', uplo='L'; \
MKL_INT m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
MKLTYPE alpha_, beta_; \
MatrixX##EIGPREFIX b_tmp; \
EIGTYPE myone(1);\
\
/* Set transpose options */ \
/* Set m, n, k */ \
m = (MKL_INT)rows; \
n = (MKL_INT)cols; \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
\
/* Set lda, ldb, ldc */ \
lda = (MKL_INT)lhsStride; \
ldb = (MKL_INT)rhsStride; \
ldc = (MKL_INT)resStride; \
\
/* Set a, b, c */ \
if (LhsStorageOrder==RowMajor) uplo='U'; \
a = _lhs; \
\
if (RhsStorageOrder==RowMajor) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = rhs.adjoint(); \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
} else b = _rhs; \
\
MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
\
} \
};
#define EIGEN_MKL_HEMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
{\
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha) \
{ \
char side='L', uplo='L'; \
MKL_INT m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
MKLTYPE alpha_, beta_; \
MatrixX##EIGPREFIX b_tmp; \
Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \
EIGTYPE myone(1); \
\
/* Set transpose options */ \
/* Set m, n, k */ \
m = (MKL_INT)rows; \
n = (MKL_INT)cols; \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
\
/* Set lda, ldb, ldc */ \
lda = (MKL_INT)lhsStride; \
ldb = (MKL_INT)rhsStride; \
ldc = (MKL_INT)resStride; \
\
/* Set a, b, c */ \
if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \
Map<const Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder>, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \
a_tmp = lhs.conjugate(); \
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else a = _lhs; \
if (LhsStorageOrder==RowMajor) uplo='U'; \
\
if (RhsStorageOrder==ColMajor && (!ConjugateRhs)) { \
b = _rhs; } \
else { \
if (RhsStorageOrder==ColMajor && ConjugateRhs) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,m,n,OuterStride<>(rhsStride)); \
b_tmp = rhs.conjugate(); \
} else \
if (ConjugateRhs) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = rhs.adjoint(); \
} else { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = rhs.transpose(); \
} \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
} \
\
MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
\
} \
};
EIGEN_MKL_SYMM_L(double, double, d, d)
EIGEN_MKL_SYMM_L(float, float, f, s)
EIGEN_MKL_HEMM_L(dcomplex, MKL_Complex16, cd, z)
EIGEN_MKL_HEMM_L(scomplex, MKL_Complex8, cf, c)
/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
#define EIGEN_MKL_SYMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
{\
\
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha) \
{ \
char side='R', uplo='L'; \
MKL_INT m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
MKLTYPE alpha_, beta_; \
MatrixX##EIGPREFIX b_tmp; \
EIGTYPE myone(1);\
\
/* Set m, n, k */ \
m = (MKL_INT)rows; \
n = (MKL_INT)cols; \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
\
/* Set lda, ldb, ldc */ \
lda = (MKL_INT)rhsStride; \
ldb = (MKL_INT)lhsStride; \
ldc = (MKL_INT)resStride; \
\
/* Set a, b, c */ \
if (RhsStorageOrder==RowMajor) uplo='U'; \
a = _rhs; \
\
if (LhsStorageOrder==RowMajor) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = lhs.adjoint(); \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
} else b = _lhs; \
\
MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
\
} \
};
#define EIGEN_MKL_HEMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
{\
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha) \
{ \
char side='R', uplo='L'; \
MKL_INT m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
MKLTYPE alpha_, beta_; \
MatrixX##EIGPREFIX b_tmp; \
Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \
EIGTYPE myone(1); \
\
/* Set m, n, k */ \
m = (MKL_INT)rows; \
n = (MKL_INT)cols; \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
\
/* Set lda, ldb, ldc */ \
lda = (MKL_INT)rhsStride; \
ldb = (MKL_INT)lhsStride; \
ldc = (MKL_INT)resStride; \
\
/* Set a, b, c */ \
if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \
Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \
a_tmp = rhs.conjugate(); \
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else a = _rhs; \
if (RhsStorageOrder==RowMajor) uplo='U'; \
\
if (LhsStorageOrder==ColMajor && (!ConjugateLhs)) { \
b = _lhs; } \
else { \
if (LhsStorageOrder==ColMajor && ConjugateLhs) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,n,OuterStride<>(lhsStride)); \
b_tmp = lhs.conjugate(); \
} else \
if (ConjugateLhs) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
b_tmp = lhs.adjoint(); \
} else { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
b_tmp = lhs.transpose(); \
} \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
} \
\
MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
} \
};
EIGEN_MKL_SYMM_R(double, double, d, d)
EIGEN_MKL_SYMM_R(float, float, f, s)
EIGEN_MKL_HEMM_R(dcomplex, MKL_Complex16, cd, z)
EIGEN_MKL_HEMM_R(scomplex, MKL_Complex8, cf, c)
} // end namespace internal
#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H

View File

@@ -32,8 +32,15 @@ namespace internal {
* the number of load/stores of the result by a factor 2 and to reduce
* the instruction dependency.
*/
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
static EIGEN_DONT_INLINE void product_selfadjoint_vector(
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version=Specialized>
struct selfadjoint_matrix_vector_product;
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
struct selfadjoint_matrix_vector_product
{
static EIGEN_DONT_INLINE void run(
Index size,
const Scalar* lhs, Index lhsStride,
const Scalar* _rhs, Index rhsIncr,
@@ -85,14 +92,14 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector(
Scalar t1 = cjAlpha * rhs[j+1];
Packet ptmp1 = pset1<Packet>(t1);
Scalar t2 = 0;
Scalar t2(0);
Packet ptmp2 = pset1<Packet>(t2);
Scalar t3 = 0;
Scalar t3(0);
Packet ptmp3 = pset1<Packet>(t3);
size_t starti = FirstTriangular ? 0 : j+2;
size_t endi = FirstTriangular ? j : size;
size_t alignedStart = (starti) + first_aligned(&res[starti], endi-starti);
size_t alignedStart = (starti) + internal::first_aligned(&res[starti], endi-starti);
size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
// TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
@@ -148,7 +155,7 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector(
register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
Scalar t1 = cjAlpha * rhs[j];
Scalar t2 = 0;
Scalar t2(0);
// TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
res[j] += cjd.pmul(internal::real(A0[j]), t1);
for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++)
@@ -159,6 +166,7 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector(
res[j] += alpha * t2;
}
}
};
} // end namespace internal
@@ -193,8 +201,8 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols());
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
@@ -232,7 +240,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
}
internal::product_selfadjoint_vector<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>
internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run
(
lhs.rows(), // size
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info

View File

@@ -0,0 +1,110 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV.
********************************************************************************
*/
#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
#define EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
namespace internal {
/**********************************************************************
* This file implements selfadjoint matrix-vector multiplication using BLAS
**********************************************************************/
// symv/hemv specialization
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
struct selfadjoint_matrix_vector_product_symv :
selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn> {};
#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
static EIGEN_DONT_INLINE void run( \
Index size, const Scalar* lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \
enum {\
IsColMajor = StorageOrder==ColMajor \
}; \
if (IsColMajor == ConjugateLhs) {\
selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \
size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \
} else {\
selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \
size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \
}\
} \
}; \
EIGEN_MKL_SYMV_SPECIALIZE(double)
EIGEN_MKL_SYMV_SPECIALIZE(float)
EIGEN_MKL_SYMV_SPECIALIZE(dcomplex)
EIGEN_MKL_SYMV_SPECIALIZE(scomplex)
#define EIGEN_MKL_SYMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLFUNC) \
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \
{ \
typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
\
static EIGEN_DONT_INLINE void run( \
Index size, const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \
{ \
enum {\
IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \
IsLower = UpLo == Lower ? 1 : 0, \
}; \
MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \
MKLTYPE alpha_, beta_; \
const EIGTYPE *x_ptr, myone(1); \
char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
SYMVVector x_tmp; \
if (ConjugateRhs) { \
Map<const SYMVVector, 0, InnerStride<> > map_x(_rhs,size,1,InnerStride<>(incx)); \
x_tmp=map_x.conjugate(); \
x_ptr=x_tmp.data(); \
incx=1; \
} else x_ptr=_rhs; \
MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
}\
};
EIGEN_MKL_SYMV_SPECIALIZATION(double, double, dsymv)
EIGEN_MKL_SYMV_SPECIALIZATION(float, float, ssymv)
EIGEN_MKL_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
EIGEN_MKL_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv)
} //end of namespase
#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H

View File

@@ -72,7 +72,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
typedef internal::blas_traits<OtherType> OtherBlasTraits;
typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
const ActualOtherType actualOther = OtherBlasTraits::extract(other.derived());
typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
@@ -105,12 +105,12 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
typedef internal::blas_traits<OtherType> OtherBlasTraits;
typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
const ActualOtherType actualOther = OtherBlasTraits::extract(other.derived());
typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
internal::general_matrix_matrix_triangular_product<Index,
Scalar, _ActualOtherType::Flags&RowMajorBit ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
Scalar, _ActualOtherType::Flags&RowMajorBit ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,

View File

@@ -76,12 +76,12 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
typedef internal::blas_traits<DerivedU> UBlasTraits;
typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
typedef typename internal::remove_all<ActualUType>::type _ActualUType;
const ActualUType actualU = UBlasTraits::extract(u.derived());
typename internal::add_const_on_value_type<ActualUType>::type actualU = UBlasTraits::extract(u.derived());
typedef internal::blas_traits<DerivedV> VBlasTraits;
typedef typename VBlasTraits::DirectLinearAccessType ActualVType;
typedef typename internal::remove_all<ActualVType>::type _ActualVType;
const ActualVType actualV = VBlasTraits::extract(v.derived());
typename internal::add_const_on_value_type<ActualVType>::type actualV = VBlasTraits::extract(v.derived());
// If MatrixType is row major, then we use the routine for lower triangular in the upper triangular case and
// vice versa, and take the complex conjugate of all coefficients and vector entries.

View File

@@ -58,16 +58,16 @@ template <typename Scalar, typename Index,
int Mode, bool LhsIsTriangular,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs,
int ResStorageOrder>
int ResStorageOrder, int Version = Specialized>
struct product_triangular_matrix_matrix;
template <typename Scalar, typename Index,
int Mode, bool LhsIsTriangular,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs>
int RhsStorageOrder, bool ConjugateRhs, int Version>
struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,RowMajor>
RhsStorageOrder,ConjugateRhs,RowMajor,Version>
{
static EIGEN_STRONG_INLINE void run(
Index rows, Index cols, Index depth,
@@ -91,15 +91,15 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
// implements col-major += alpha * op(triangular) * op(general)
template <typename Scalar, typename Index, int Mode,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs>
int RhsStorageOrder, bool ConjugateRhs, int Version>
struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,ColMajor>
RhsStorageOrder,ConjugateRhs,ColMajor,Version>
{
typedef gebp_traits<Scalar,Scalar> Traits;
enum {
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
SmallPanelWidth = 2 * EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
IsLower = (Mode&Lower) == Lower,
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
};
@@ -220,10 +220,10 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
// implements col-major += alpha * op(general) * op(triangular)
template <typename Scalar, typename Index, int Mode,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs>
int RhsStorageOrder, bool ConjugateRhs, int Version>
struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,ColMajor>
RhsStorageOrder,ConjugateRhs,ColMajor,Version>
{
typedef gebp_traits<Scalar,Scalar> Traits;
enum {
@@ -378,8 +378,8 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);

View File

@@ -0,0 +1,305 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Triangular matrix * matrix product functionality based on ?TRMM.
********************************************************************************
*/
#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
#define EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
namespace internal {
template <typename Scalar, typename Index,
int Mode, bool LhsIsTriangular,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs,
int ResStorageOrder>
struct product_triangular_matrix_matrix_trmm :
product_triangular_matrix_matrix<Scalar,Index,Mode,
LhsIsTriangular,LhsStorageOrder,ConjugateLhs,
RhsStorageOrder, ConjugateRhs, ResStorageOrder, BuiltIn> {};
// try to go to BLAS specialization
#define EIGEN_MKL_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,Specialized> { \
static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\
const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) { \
product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \
LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \
RhsStorageOrder, ConjugateRhs, ColMajor>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
} \
};
EIGEN_MKL_TRMM_SPECIALIZE(double, true)
EIGEN_MKL_TRMM_SPECIALIZE(double, false)
EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, true)
EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, false)
EIGEN_MKL_TRMM_SPECIALIZE(float, true)
EIGEN_MKL_TRMM_SPECIALIZE(float, false)
EIGEN_MKL_TRMM_SPECIALIZE(scomplex, true)
EIGEN_MKL_TRMM_SPECIALIZE(scomplex, false)
// implements col-major += alpha * op(triangular) * op(general)
#define EIGEN_MKL_TRMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper, \
conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \
}; \
\
static EIGEN_DONT_INLINE void run( \
Index _rows, Index _cols, Index _depth, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha) \
{ \
Index diagSize = (std::min)(_rows,_depth); \
Index rows = IsLower ? _rows : diagSize; \
Index depth = IsLower ? diagSize : _depth; \
Index cols = _cols; \
\
typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
\
/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
if (rows != depth) { \
\
int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
\
if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
/* Most likely no benefit to call TRMM or GEMM from MKL*/ \
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
/*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \
} else { \
/* Make sense to call GEMM */ \
Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
MKL_INT aStride = aa_tmp.outerStride(); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> blocking(_rows,_cols,_depth); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
\
/*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
} \
return; \
} \
char side = 'L', transa, uplo, diag = 'N'; \
EIGTYPE *b; \
const EIGTYPE *a; \
MKL_INT m, n, k, lda, ldb, ldc; \
MKLTYPE alpha_; \
\
/* Set alpha_*/ \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
\
/* Set m, n */ \
m = (MKL_INT)diagSize; \
n = (MKL_INT)cols; \
\
/* Set trans */ \
transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
\
/* Set b, ldb */ \
Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols,OuterStride<>(rhsStride)); \
MatrixX##EIGPREFIX b_tmp; \
\
if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
\
/* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \
if (LhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
/* Set a, lda */ \
Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
MatrixLhs a_tmp; \
\
if ((conjA!=0) || (SetDiag==0)) { \
if (conjA) a_tmp = lhs.conjugate(); else a_tmp = lhs; \
if (IsZeroDiag) \
a_tmp.diagonal().setZero(); \
else if (IsUnitDiag) \
a_tmp.diagonal().setOnes();\
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else { \
a = _lhs; \
lda = lhsStride; \
} \
/*std::cout << "TRMM_L: A is square! Go to MKL TRMM implementation! \n";*/ \
/* call ?trmm*/ \
MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
\
/* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
res_tmp=res_tmp+b_tmp; \
} \
};
EIGEN_MKL_TRMM_L(double, double, d, d)
EIGEN_MKL_TRMM_L(dcomplex, MKL_Complex16, cd, z)
EIGEN_MKL_TRMM_L(float, float, f, s)
EIGEN_MKL_TRMM_L(scomplex, MKL_Complex8, cf, c)
// implements col-major += alpha * op(general) * op(triangular)
#define EIGEN_MKL_TRMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper, \
conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \
}; \
\
static EIGEN_DONT_INLINE void run( \
Index _rows, Index _cols, Index _depth, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha) \
{ \
Index diagSize = (std::min)(_cols,_depth); \
Index rows = _rows; \
Index depth = IsLower ? _depth : diagSize; \
Index cols = IsLower ? diagSize : _cols; \
\
typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
\
/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
if (cols != depth) { \
\
int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
\
if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
/* Most likely no benefit to call TRMM or GEMM from MKL*/ \
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
/*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \
} else { \
/* Make sense to call GEMM */ \
Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
MKL_INT aStride = aa_tmp.outerStride(); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> blocking(_rows,_cols,_depth); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, blocking); \
\
/*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
} \
return; \
} \
char side = 'R', transa, uplo, diag = 'N'; \
EIGTYPE *b; \
const EIGTYPE *a; \
MKL_INT m, n, k, lda, ldb, ldc; \
MKLTYPE alpha_; \
\
/* Set alpha_*/ \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
\
/* Set m, n */ \
m = (MKL_INT)rows; \
n = (MKL_INT)diagSize; \
\
/* Set trans */ \
transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
\
/* Set b, ldb */ \
Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
MatrixX##EIGPREFIX b_tmp; \
\
if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
\
/* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \
if (RhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
/* Set a, lda */ \
Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols, OuterStride<>(rhsStride)); \
MatrixRhs a_tmp; \
\
if ((conjA!=0) || (SetDiag==0)) { \
if (conjA) a_tmp = rhs.conjugate(); else a_tmp = rhs; \
if (IsZeroDiag) \
a_tmp.diagonal().setZero(); \
else if (IsUnitDiag) \
a_tmp.diagonal().setOnes();\
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else { \
a = _rhs; \
lda = rhsStride; \
} \
/*std::cout << "TRMM_R: A is square! Go to MKL TRMM implementation! \n";*/ \
/* call ?trmm*/ \
MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
\
/* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
res_tmp=res_tmp+b_tmp; \
} \
};
EIGEN_MKL_TRMM_R(double, double, d, d)
EIGEN_MKL_TRMM_R(dcomplex, MKL_Complex16, cd, z)
EIGEN_MKL_TRMM_R(float, float, f, s)
EIGEN_MKL_TRMM_R(scomplex, MKL_Complex8, cf, c)
} // end namespace internal
#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H

View File

@@ -27,21 +27,25 @@
namespace internal {
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder>
struct product_triangular_matrix_vector;
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder, int Version=Specialized>
struct triangular_matrix_vector_product;
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs>
struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor>
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum {
IsLower = ((Mode&Lower)==Lower),
HasUnitDiag = (Mode & UnitDiag)==UnitDiag
HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
};
static EIGEN_DONT_INLINE void run(Index rows, Index cols, const LhsScalar* _lhs, Index lhsStride,
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
{
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
Index size = (std::min)(_rows,_cols);
Index rows = IsLower ? _rows : (std::min)(_rows,_cols);
Index cols = IsLower ? (std::min)(_rows,_cols) : _cols;
typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap;
const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride));
@@ -54,45 +58,57 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
typedef Map<Matrix<ResScalar,Dynamic,1> > ResMap;
ResMap res(_res,rows);
for (Index pi=0; pi<cols; pi+=PanelWidth)
for (Index pi=0; pi<size; pi+=PanelWidth)
{
Index actualPanelWidth = (std::min)(PanelWidth, cols-pi);
Index actualPanelWidth = (std::min)(PanelWidth, size-pi);
for (Index k=0; k<actualPanelWidth; ++k)
{
Index i = pi + k;
Index s = IsLower ? (HasUnitDiag ? i+1 : i ) : pi;
Index s = IsLower ? ((HasUnitDiag||HasZeroDiag) ? i+1 : i ) : pi;
Index r = IsLower ? actualPanelWidth-k : k+1;
if ((!HasUnitDiag) || (--r)>0)
if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0)
res.segment(s,r) += (alpha * cjRhs.coeff(i)) * cjLhs.col(i).segment(s,r);
if (HasUnitDiag)
res.coeffRef(i) += alpha * cjRhs.coeff(i);
}
Index r = IsLower ? cols - pi - actualPanelWidth : pi;
Index r = IsLower ? rows - pi - actualPanelWidth : pi;
if (r>0)
{
Index s = IsLower ? pi+actualPanelWidth : 0;
general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs>::run(
general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs,BuiltIn>::run(
r, actualPanelWidth,
&lhs.coeffRef(s,pi), lhsStride,
&rhs.coeffRef(pi), rhsIncr,
&res.coeffRef(s), resIncr, alpha);
}
}
if((!IsLower) && cols>size)
{
general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs>::run(
rows, cols-size,
&lhs.coeffRef(0,size), lhsStride,
&rhs.coeffRef(size), rhsIncr,
_res, resIncr, alpha);
}
}
};
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs>
struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor>
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum {
IsLower = ((Mode&Lower)==Lower),
HasUnitDiag = (Mode & UnitDiag)==UnitDiag
HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
};
static void run(Index rows, Index cols, const LhsScalar* _lhs, Index lhsStride,
static void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
{
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
Index diagSize = (std::min)(_rows,_cols);
Index rows = IsLower ? _rows : diagSize;
Index cols = IsLower ? diagSize : _cols;
typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap;
const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride));
@@ -105,15 +121,15 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
typedef Map<Matrix<ResScalar,Dynamic,1>, 0, InnerStride<> > ResMap;
ResMap res(_res,rows,InnerStride<>(resIncr));
for (Index pi=0; pi<cols; pi+=PanelWidth)
for (Index pi=0; pi<diagSize; pi+=PanelWidth)
{
Index actualPanelWidth = (std::min)(PanelWidth, cols-pi);
Index actualPanelWidth = (std::min)(PanelWidth, diagSize-pi);
for (Index k=0; k<actualPanelWidth; ++k)
{
Index i = pi + k;
Index s = IsLower ? pi : (HasUnitDiag ? i+1 : i);
Index s = IsLower ? pi : ((HasUnitDiag||HasZeroDiag) ? i+1 : i);
Index r = IsLower ? k+1 : actualPanelWidth-k;
if ((!HasUnitDiag) || (--r)>0)
if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0)
res.coeffRef(i) += alpha * (cjLhs.row(i).segment(s,r).cwiseProduct(cjRhs.segment(s,r).transpose())).sum();
if (HasUnitDiag)
res.coeffRef(i) += alpha * cjRhs.coeff(i);
@@ -122,13 +138,21 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
if (r>0)
{
Index s = IsLower ? 0 : pi + actualPanelWidth;
general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs>::run(
general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs,BuiltIn>::run(
actualPanelWidth, r,
&lhs.coeffRef(pi,s), lhsStride,
&rhs.coeffRef(s), rhsIncr,
&res.coeffRef(pi), resIncr, alpha);
}
}
if(IsLower && rows>diagSize)
{
general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs>::run(
rows-diagSize, cols,
&lhs.coeffRef(diagSize,0), lhsStride,
&rhs.coeffRef(0), rhsIncr,
&res.coeffRef(diagSize), resIncr, alpha);
}
}
};
@@ -180,7 +204,7 @@ struct TriangularProduct<Mode,false,Lhs,true,Rhs,false>
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
typedef TriangularProduct<(Mode & UnitDiag) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose;
typedef TriangularProduct<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose;
Transpose<Dest> dstT(dst);
internal::trmv_selector<(int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>::run(
TriangularProductTranspose(m_rhs.transpose(),m_lhs.transpose()), dstT, alpha);
@@ -208,8 +232,8 @@ template<> struct trmv_selector<ColMajor>
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
const ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
const ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
@@ -247,7 +271,7 @@ template<> struct trmv_selector<ColMajor>
MappedDest(actualDestPtr, dest.size()) = dest;
}
internal::product_triangular_matrix_vector
internal::triangular_matrix_vector_product
<Index,Mode,
LhsScalar, LhsBlasTraits::NeedToConjugate,
RhsScalar, RhsBlasTraits::NeedToConjugate,
@@ -307,7 +331,7 @@ template<> struct trmv_selector<RowMajor>
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
}
internal::product_triangular_matrix_vector
internal::triangular_matrix_vector_product
<Index,Mode,
LhsScalar, LhsBlasTraits::NeedToConjugate,
RhsScalar, RhsBlasTraits::NeedToConjugate,

View File

@@ -0,0 +1,245 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Triangular matrix-vector product functionality based on ?TRMV.
********************************************************************************
*/
#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
#define EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
namespace internal {
/**********************************************************************
* This file implements triangular matrix-vector multiplication using BLAS
**********************************************************************/
// trmv/hemv specialization
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder>
struct triangular_matrix_vector_product_trmv :
triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,StorageOrder,BuiltIn> {};
#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
} \
}; \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
} \
};
EIGEN_MKL_TRMV_SPECIALIZE(double)
EIGEN_MKL_TRMV_SPECIALIZE(float)
EIGEN_MKL_TRMV_SPECIALIZE(dcomplex)
EIGEN_MKL_TRMV_SPECIALIZE(scomplex)
// implements col-major: res += alpha * op(triangular) * vector
#define EIGEN_MKL_TRMV_CM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
enum { \
IsLower = (Mode&Lower) == Lower, \
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper \
}; \
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
{ \
if (ConjLhs || IsZeroDiag) { \
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
return; \
}\
Index size = (std::min)(_rows,_cols); \
Index rows = IsLower ? _rows : size; \
Index cols = IsLower ? size : _cols; \
\
typedef VectorX##EIGPREFIX VectorRhs; \
EIGTYPE *x, *y;\
\
/* Set x*/ \
Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
VectorRhs x_tmp; \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
\
/* Square part handling */\
\
char trans, uplo, diag; \
MKL_INT m, n, k, lda, incx, incy; \
EIGTYPE const *a; \
MKLTYPE alpha_, beta_; \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
\
/* Set m, n */ \
n = (MKL_INT)size; \
lda = lhsStride; \
incx = 1; \
incy = resIncr; \
\
/* Set uplo, trans and diag*/ \
trans = 'N'; \
uplo = IsLower ? 'L' : 'U'; \
diag = IsUnitDiag ? 'U' : 'N'; \
\
/* call ?TRMV*/ \
std::cout << "TRMV: CM\n";\
MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \
\
/* Add op(a_tr)rhs into res*/ \
MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \
typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
if (size<rows) { \
y = _res + size*resIncr; \
a = _lhs + size; \
m = rows-size; \
n = size; \
} \
if (size<cols) { \
x += size; \
y = _res; \
a = _lhs + size*lda; \
m = size; \
n = cols-size; \
} \
MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \
} \
} \
};
EIGEN_MKL_TRMV_CM(double, double, d, d)
EIGEN_MKL_TRMV_CM(dcomplex, MKL_Complex16, cd, z)
EIGEN_MKL_TRMV_CM(float, float, f, s)
EIGEN_MKL_TRMV_CM(scomplex, MKL_Complex8, cf, c)
// implements row-major: res += alpha * op(triangular) * vector
#define EIGEN_MKL_TRMV_RM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
enum { \
IsLower = (Mode&Lower) == Lower, \
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper \
}; \
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
{ \
if (IsZeroDiag) { \
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
return; \
}\
Index size = (std::min)(_rows,_cols); \
Index rows = IsLower ? _rows : size; \
Index cols = IsLower ? size : _cols; \
\
typedef VectorX##EIGPREFIX VectorRhs; \
EIGTYPE *x, *y;\
\
/* Set x*/ \
Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
VectorRhs x_tmp; \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
\
/* Square part handling */\
\
char trans, uplo, diag; \
MKL_INT m, n, k, lda, incx, incy; \
EIGTYPE const *a; \
MKLTYPE alpha_, beta_; \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
\
/* Set m, n */ \
n = (MKL_INT)size; \
lda = lhsStride; \
incx = 1; \
incy = resIncr; \
\
/* Set uplo, trans and diag*/ \
trans = ConjLhs ? 'C' : 'T'; \
uplo = IsLower ? 'U' : 'L'; \
diag = IsUnitDiag ? 'U' : 'N'; \
\
/* call ?TRMV*/ \
std::cout << "TRMV: RM\n";\
MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \
\
/* Add op(a_tr)rhs into res*/ \
MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \
typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
if (size<rows) { \
y = _res + size*resIncr; \
a = _lhs + size*lda; \
m = rows-size; \
n = size; \
} \
if (size<cols) { \
x += size; \
y = _res; \
a = _lhs + size; \
m = size; \
n = cols-size; \
} \
MKLPREFIX##gemv(&trans, &n, &m, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \
} \
} \
};
EIGEN_MKL_TRMV_RM(double, double, d, d)
EIGEN_MKL_TRMV_RM(dcomplex, MKL_Complex16, cd, z)
EIGEN_MKL_TRMV_RM(float, float, f, s)
EIGEN_MKL_TRMV_RM(scomplex, MKL_Complex8, cf, c)
} //end of namespase
#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H

View File

@@ -75,12 +75,20 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* blockB = allocatedBlockB + sizeW;
Scalar* blockW = allocatedBlockB;
conj_if<Conjugate> conj;
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs;
// the goal here is to subdivise the Rhs panels such that we keep some cache
// coherence when accessing the rhs elements
std::ptrdiff_t l1, l2;
manage_caching_sizes(GetAction, &l1, &l2);
Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * otherStride) : 0;
subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr);
for(Index k2=IsLower ? 0 : size;
IsLower ? k2<size : k2>0;
IsLower ? k2+=kc : k2-=kc)
@@ -92,16 +100,18 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
// A11 (the triangular part) and A21 the remaining rectangular part.
// Then the high level algorithm is:
// - B = R1 => general block copy (done during the next step)
// - R1 = L1^-1 B => tricky part
// - R1 = A11^-1 B => tricky part
// - update B from the new R1 => actually this has to be performed continuously during the above step
// - R2 = L2 * B => GEPP
// - R2 -= A21 * B => GEPP
// The tricky part: compute R1 = L1^-1 B while updating B from R1
// The idea is to split L1 into multiple small vertical panels.
// Each panel can be split into a small triangular part A1 which is processed without optimization,
// and the remaining small part A2 which is processed using gebp with appropriate block strides
// The tricky part: compute R1 = A11^-1 B while updating B from R1
// The idea is to split A11 into multiple small vertical panels.
// Each panel can be split into a small triangular part T1k which is processed without optimization,
// and the remaining small part T2k which is processed using gebp with appropriate block strides
for(Index j2=0; j2<cols; j2+=subcols)
{
// for each small vertical panels of lhs
Index actual_cols = (std::min)(cols-j2,subcols);
// for each small vertical panels [T1k^T, T2k^T]^T of lhs
for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth)
{
Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
@@ -114,11 +124,11 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
Index rs = actualPanelWidth - k - 1; // remaining size
Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
for (Index j=0; j<cols; ++j)
for (Index j=j2; j<j2+actual_cols; ++j)
{
if (TriStorageOrder==RowMajor)
{
Scalar b = 0;
Scalar b(0);
const Scalar* l = &tri(i,s);
Scalar* r = &other(s,j);
for (Index i3=0; i3<k; ++i3)
@@ -143,7 +153,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
Index blockBOffset = IsLower ? k1 : lengthTarget;
// update the respective rows of B from other
pack_rhs(blockB, _other+startBlock, otherStride, actualPanelWidth, cols, actual_kc, blockBOffset);
pack_rhs(blockB+actual_kc*j2, &other(startBlock,j2), otherStride, actualPanelWidth, actual_cols, actual_kc, blockBOffset);
// GEBP
if (lengthTarget>0)
@@ -152,13 +162,13 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
pack_lhs(blockA, &tri(startTarget,startBlock), triStride, actualPanelWidth, lengthTarget);
gebp_kernel(_other+startTarget, otherStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, Scalar(-1),
actualPanelWidth, actual_kc, 0, blockBOffset);
gebp_kernel(&other(startTarget,j2), otherStride, blockA, blockB+actual_kc*j2, lengthTarget, actualPanelWidth, actual_cols, Scalar(-1),
actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
}
}
}
// R2 = A2 * B => GEPP
// R2 -= A21 * B => GEPP
{
Index start = IsLower ? k2+kc : 0;
Index end = IsLower ? size : k2-kc;

View File

@@ -0,0 +1,151 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Triangular matrix * matrix product functionality based on ?TRMM.
********************************************************************************
*/
#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
#define EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
namespace internal {
// implements LeftSide op(triangular)^-1 * general
#define EIGEN_MKL_TRSM_L(EIGTYPE, MKLTYPE, MKLPREFIX) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
}; \
static EIGEN_DONT_INLINE void run( \
Index size, Index otherSize, \
const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherStride) \
{ \
MKL_INT m = size, n = otherSize, lda, ldb; \
char side = 'L', uplo, diag='N', transa; \
/* Set alpha_ */ \
MKLTYPE alpha; \
EIGTYPE myone(1); \
assign_scalar_eig2mkl(alpha, myone); \
ldb = otherStride;\
\
const EIGTYPE *a; \
/* Set trans */ \
transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
/* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \
if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
/* Set a, lda */ \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
MatrixTri a_tmp; \
\
if (conjA) { \
a_tmp = tri.conjugate(); \
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else { \
a = _tri; \
lda = triStride; \
} \
if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \
MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \
} \
};
EIGEN_MKL_TRSM_L(double, double, d)
EIGEN_MKL_TRSM_L(dcomplex, MKL_Complex16, z)
EIGEN_MKL_TRSM_L(float, float, s)
EIGEN_MKL_TRSM_L(scomplex, MKL_Complex8, c)
// implements RightSide general * op(triangular)^-1
#define EIGEN_MKL_TRSM_R(EIGTYPE, MKLTYPE, MKLPREFIX) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
}; \
static EIGEN_DONT_INLINE void run( \
Index size, Index otherSize, \
const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherStride) \
{ \
MKL_INT m = otherSize, n = size, lda, ldb; \
char side = 'R', uplo, diag='N', transa; \
/* Set alpha_ */ \
MKLTYPE alpha; \
EIGTYPE myone(1); \
assign_scalar_eig2mkl(alpha, myone); \
ldb = otherStride;\
\
const EIGTYPE *a; \
/* Set trans */ \
transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
/* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \
if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
/* Set a, lda */ \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
MatrixTri a_tmp; \
\
if (conjA) { \
a_tmp = tri.conjugate(); \
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else { \
a = _tri; \
lda = triStride; \
} \
if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \
MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \
/*std::cout << "TRMS_L specialization!\n";*/ \
} \
};
EIGEN_MKL_TRSM_R(double, double, d)
EIGEN_MKL_TRSM_R(dcomplex, MKL_Complex16, z)
EIGEN_MKL_TRSM_R(float, float, s)
EIGEN_MKL_TRSM_R(scomplex, MKL_Complex8, c)
} // end namespace internal
#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H

View File

@@ -47,7 +47,7 @@ template<
int ResStorageOrder>
struct general_matrix_matrix_product;
template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version=Specialized>
struct general_matrix_vector_product;
@@ -56,11 +56,15 @@ template<bool Conjugate> struct conj_if;
template<> struct conj_if<true> {
template<typename T>
inline T operator()(const T& x) { return conj(x); }
template<typename T>
inline T pconj(const T& x) { return internal::pconj(x); }
};
template<> struct conj_if<false> {
template<typename T>
inline const T& operator()(const T& x) { return x; }
template<typename T>
inline const T& pconj(const T& x) { return x; }
};
template<typename Scalar> struct conj_helper<Scalar,Scalar,false,false>
@@ -118,11 +122,11 @@ template<typename RealScalar,bool Conj> struct conj_helper<RealScalar, std::comp
};
template<typename From,typename To> struct get_factor {
EIGEN_STRONG_INLINE static To run(const From& x) { return x; }
static EIGEN_STRONG_INLINE To run(const From& x) { return x; }
};
template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::Real> {
EIGEN_STRONG_INLINE static typename NumTraits<Scalar>::Real run(const Scalar& x) { return real(x); }
static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return real(x); }
};
// Lightweight helper class to access matrix coefficients.
@@ -175,7 +179,7 @@ template<typename XprType> struct blas_traits
ExtractType,
typename _ExtractType::PlainObject
>::type DirectLinearAccessType;
static inline const ExtractType extract(const XprType& x) { return x; }
static inline ExtractType extract(const XprType& x) { return x; }
static inline const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
};
@@ -192,7 +196,7 @@ struct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> >
IsComplex = NumTraits<Scalar>::IsComplex,
NeedToConjugate = Base::NeedToConjugate ? 0 : IsComplex
};
static inline const ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline Scalar extractScalarFactor(const XprType& x) { return conj(Base::extractScalarFactor(x.nestedExpression())); }
};
@@ -204,7 +208,7 @@ struct blas_traits<CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> >
typedef blas_traits<NestedXpr> Base;
typedef CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> XprType;
typedef typename Base::ExtractType ExtractType;
static inline const ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline Scalar extractScalarFactor(const XprType& x)
{ return x.functor().m_other * Base::extractScalarFactor(x.nestedExpression()); }
};
@@ -217,7 +221,7 @@ struct blas_traits<CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> >
typedef blas_traits<NestedXpr> Base;
typedef CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> XprType;
typedef typename Base::ExtractType ExtractType;
static inline const ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline Scalar extractScalarFactor(const XprType& x)
{ return - Base::extractScalarFactor(x.nestedExpression()); }
};
@@ -239,7 +243,7 @@ struct blas_traits<Transpose<NestedXpr> >
enum {
IsTransposed = Base::IsTransposed ? 0 : 1
};
static inline const ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); }
};
@@ -252,7 +256,7 @@ template<typename T, bool HasUsableDirectAccess=blas_traits<T>::HasUsableDirectA
struct extract_data_selector {
static const typename T::Scalar* run(const T& m)
{
return const_cast<typename T::Scalar*>(&blas_traits<T>::extract(m).coeffRef(0,0)); // FIXME this should be .data()
return blas_traits<T>::extract(m).data();
}
};

View File

@@ -188,7 +188,9 @@ enum {
/** View matrix as an upper triangular matrix with zeros on the diagonal. */
StrictlyUpper=ZeroDiag|Upper,
/** Used in BandMatrix and SelfAdjointView to indicate that the matrix is self-adjoint. */
SelfAdjoint=0x10
SelfAdjoint=0x10,
/** Used to support symmetric, non-selfadjoint, complex matrices. */
Symmetric=0x20
};
/** \ingroup enums
@@ -200,8 +202,6 @@ enum {
Aligned=1
};
enum { ConditionalJumpCost = 5 };
/** \ingroup enums
* Enum used by DenseBase::corner() in Eigen2 compatibility mode. */
// FIXME after the corner() API change, this was not needed anymore, except by AlignedBox
@@ -223,8 +223,6 @@ enum DirectionType {
BothDirections
};
enum ProductEvaluationMode { NormalProduct, CacheFriendlyProduct };
/** \internal \ingroup enums
* Enum to specify how to traverse the entries of a matrix. */
enum {
@@ -257,6 +255,13 @@ enum {
CompleteUnrolling
};
/** \internal \ingroup enums
* Enum to specify whether to use the default (built-in) implementation or the specialization. */
enum {
Specialized,
BuiltIn
};
/** \ingroup enums
* Enum containing possible values for the \p _Options template parameter of
* Matrix, Array and BandMatrix. */
@@ -375,7 +380,7 @@ enum QRPreconditioners {
#error The preprocessor symbol 'Success' is defined, possibly by the X11 header file X.h
#endif
/** \ingroups enums
/** \ingroup enums
* Enum for reporting the status of a computation. */
enum ComputationInfo {
/** Computation was successful. */
@@ -383,7 +388,10 @@ enum ComputationInfo {
/** The provided data did not satisfy the prerequisites. */
NumericalIssue = 1,
/** Iterative procedure did not converge. */
NoConvergence = 2
NoConvergence = 2,
/** The inputs are invalid, or the algorithm has been improperly called.
* When assertions are enabled, such errors trigger an assert. */
InvalidInput = 3
};
/** \ingroup enums

View File

@@ -21,15 +21,13 @@
#elif defined __INTEL_COMPILER
// 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
// ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body
// 2536 - type qualifiers are meaningless here
// ICC 12 generates this warning when a function return type is const qualified, even if that type is a template-parameter-dependent
// typedef that may be a reference type.
// 279 - controlling expression is constant
// ICC 12 generates this warning on assert(constant_expression_depending_on_template_params) and frankly this is a legitimate use case.
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma warning push
#endif
#pragma warning disable 2196 2536 279
#pragma warning disable 2196 279
#elif defined __clang__
// -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant
// this is really a stupid warning as it warns on compile-time expressions involving enums

View File

@@ -133,6 +133,7 @@ template<typename ExpressionType> class WithFormat;
template<typename MatrixType> struct CommaInitializer;
template<typename Derived> class ReturnByValue;
template<typename ExpressionType> class ArrayWrapper;
template<typename ExpressionType> class MatrixWrapper;
namespace internal {
template<typename DecompositionType, typename Rhs> struct solve_retval_base;
@@ -282,6 +283,8 @@ template<typename MatrixType,int Direction> class Homogeneous;
// MatrixFunctions module
template<typename Derived> struct MatrixExponentialReturnValue;
template<typename Derived> class MatrixFunctionReturnValue;
template<typename Derived> class MatrixSquareRootReturnValue;
template<typename Derived> class MatrixLogarithmReturnValue;
namespace internal {
template <typename Scalar>

View File

@@ -0,0 +1,109 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Include file with common MKL declarations
********************************************************************************
*/
#ifndef EIGEN_MKL_SUPPORT_H
#define EIGEN_MKL_SUPPORT_H
#ifdef EIGEN_USE_MKL_ALL
#ifndef EIGEN_USE_BLAS
#define EIGEN_USE_BLAS
#endif
#ifndef EIGEN_USE_LAPACKE
#define EIGEN_USE_LAPACKE
#endif
#ifndef EIGEN_USE_MKL_VML
#define EIGEN_USE_MKL_VML
#endif
#endif
#ifdef EIGEN_USE_LAPACKE_STRICT
#define EIGEN_USE_LAPACKE
#endif
#if defined(EIGEN_USE_BLAS) || defined(EIGEN_USE_LAPACKE) || defined(EIGEN_USE_MKL_VML)
#define EIGEN_USE_MKL
#endif
#if defined EIGEN_USE_MKL
#include <mkl.h>
#include <mkl_lapacke.h>
#define EIGEN_MKL_VML_THRESHOLD 128
namespace Eigen {
typedef std::complex<double> dcomplex;
typedef std::complex<float> scomplex;
namespace internal {
template<typename MKLType, typename EigenType>
static inline void assign_scalar_eig2mkl(MKLType& mklScalar, const EigenType& eigenScalar) {
mklScalar=eigenScalar;
}
template<typename MKLType, typename EigenType>
static inline void assign_conj_scalar_eig2mkl(MKLType& mklScalar, const EigenType& eigenScalar) {
mklScalar=eigenScalar;
}
template <>
inline void assign_scalar_eig2mkl<MKL_Complex16,dcomplex>(MKL_Complex16& mklScalar, const dcomplex& eigenScalar) {
mklScalar.real=eigenScalar.real();
mklScalar.imag=eigenScalar.imag();
}
template <>
inline void assign_scalar_eig2mkl<MKL_Complex8,scomplex>(MKL_Complex8& mklScalar, const scomplex& eigenScalar) {
mklScalar.real=eigenScalar.real();
mklScalar.imag=eigenScalar.imag();
}
template <>
inline void assign_conj_scalar_eig2mkl<MKL_Complex16,dcomplex>(MKL_Complex16& mklScalar, const dcomplex& eigenScalar) {
mklScalar.real=eigenScalar.real();
mklScalar.imag=-eigenScalar.imag();
}
template <>
inline void assign_conj_scalar_eig2mkl<MKL_Complex8,scomplex>(MKL_Complex8& mklScalar, const scomplex& eigenScalar) {
mklScalar.real=eigenScalar.real();
mklScalar.imag=-eigenScalar.imag();
}
} // end namespace internal
} // end namespace Eigen
#endif
#endif // EIGEN_MKL_SUPPORT_H

View File

@@ -28,7 +28,7 @@
#define EIGEN_WORLD_VERSION 3
#define EIGEN_MAJOR_VERSION 0
#define EIGEN_MINOR_VERSION 4
#define EIGEN_MINOR_VERSION 92
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@@ -234,12 +234,16 @@
#define EIGEN_ONLY_USED_FOR_DEBUG(x)
#endif
#if (defined __GNUC__)
#define EIGEN_DEPRECATED __attribute__((deprecated))
#elif (defined _MSC_VER)
#define EIGEN_DEPRECATED __declspec(deprecated)
#ifndef EIGEN_NO_DEPRECATED_WARNING
#if (defined __GNUC__)
#define EIGEN_DEPRECATED __attribute__((deprecated))
#elif (defined _MSC_VER)
#define EIGEN_DEPRECATED __declspec(deprecated)
#else
#define EIGEN_DEPRECATED
#endif
#else
#define EIGEN_DEPRECATED
#define EIGEN_DEPRECATED
#endif
#if (defined __GNUC__)
@@ -251,7 +255,7 @@
// Suppresses 'unused variable' warnings.
#define EIGEN_UNUSED_VARIABLE(var) (void)var;
#if (defined __GNUC__)
#if !defined(EIGEN_ASM_COMMENT) && (defined __GNUC__)
#define EIGEN_ASM_COMMENT(X) asm("#" X)
#else
#define EIGEN_ASM_COMMENT(X)

View File

@@ -457,7 +457,7 @@ template<typename T, bool Align> inline void conditional_aligned_delete_auto(T *
* There is also the variant first_aligned(const MatrixBase&) defined in DenseCoeffsBase.h.
*/
template<typename Scalar, typename Index>
inline static Index first_aligned(const Scalar* array, Index size)
static inline Index first_aligned(const Scalar* array, Index size)
{
typedef typename packet_traits<Scalar>::type Packet;
enum { PacketSize = packet_traits<Scalar>::size,
@@ -483,6 +483,27 @@ inline static Index first_aligned(const Scalar* array, Index size)
}
}
// std::copy is much slower than memcpy, so let's introduce a smart_copy which
// use memcpy on trivial types, i.e., on types that does not require an initialization ctor.
template<typename T, bool UseMemcpy> struct smart_copy_helper;
template<typename T> void smart_copy(const T* start, const T* end, T* target)
{
smart_copy_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
}
template<typename T> struct smart_copy_helper<T,true> {
static inline void run(const T* start, const T* end, T* target)
{ memcpy(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); }
};
template<typename T> struct smart_copy_helper<T,false> {
static inline void run(const T* start, const T* end, T* target)
{ std::copy(start, end, target); }
};
} // end namespace internal
/*****************************************************************************
@@ -538,7 +559,7 @@ template<typename T> class aligned_stack_memory_handler
* if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
* (currently, this is Linux and Visual Studio only). Otherwise the memory is allocated on the heap.
* The allocated buffer is automatically deleted when exiting the scope of this declaration.
* If BUFFER is non nul, then the declared variable is simply an alias for BUFFER, and no allocation/deletion occurs.
* If BUFFER is non null, then the declared variable is simply an alias for BUFFER, and no allocation/deletion occurs.
* Here is an example:
* \code
* {
@@ -619,7 +640,7 @@ template<typename T> class aligned_stack_memory_handler
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%16==0))
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%16==0)))
/****************************************************************************/
@@ -667,24 +688,24 @@ public:
return &value;
}
aligned_allocator() throw()
aligned_allocator()
{
}
aligned_allocator( const aligned_allocator& ) throw()
aligned_allocator( const aligned_allocator& )
{
}
template<class U>
aligned_allocator( const aligned_allocator<U>& ) throw()
aligned_allocator( const aligned_allocator<U>& )
{
}
~aligned_allocator() throw()
~aligned_allocator()
{
}
size_type max_size() const throw()
size_type max_size() const
{
return (std::numeric_limits<size_type>::max)();
}
@@ -720,19 +741,21 @@ public:
//---------- Cache sizes ----------
#if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) )
# if defined(__PIC__) && defined(__i386__)
// Case for x86 with PIC
# define EIGEN_CPUID(abcd,func,id) \
__asm__ __volatile__ ("xchgl %%ebx, %%esi;cpuid; xchgl %%ebx,%%esi": "=a" (abcd[0]), "=S" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
# else
// Case for x86_64 or x86 w/o PIC
# define EIGEN_CPUID(abcd,func,id) \
__asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id) );
# endif
#elif defined(_MSC_VER)
# if (_MSC_VER > 1500)
# define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
#if !defined(EIGEN_NO_CPUID)
# if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) )
# if defined(__PIC__) && defined(__i386__)
// Case for x86 with PIC
# define EIGEN_CPUID(abcd,func,id) \
__asm__ __volatile__ ("xchgl %%ebx, %%esi;cpuid; xchgl %%ebx,%%esi": "=a" (abcd[0]), "=S" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
# else
// Case for x86_64 or x86 w/o PIC
# define EIGEN_CPUID(abcd,func,id) \
__asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id) );
# endif
# elif defined(_MSC_VER)
# if (_MSC_VER > 1500)
# define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
# endif
# endif
#endif
@@ -742,7 +765,7 @@ namespace internal {
inline bool cpuid_is_vendor(int abcd[4], const char* vendor)
{
return abcd[1]==((int*)(vendor))[0] && abcd[3]==((int*)(vendor))[1] && abcd[2]==((int*)(vendor))[2];
return abcd[1]==(reinterpret_cast<const int*>(vendor))[0] && abcd[3]==(reinterpret_cast<const int*>(vendor))[1] && abcd[2]==(reinterpret_cast<const int*>(vendor))[2];
}
inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)

Some files were not shown because too many files have changed in this diff Show More