Compare commits

...

235 Commits

Author SHA1 Message Date
Gael Guennebaud
dd3685cc6a Bump to 3.3.1 2016-12-06 11:43:58 +01:00
Gael Guennebaud
487a6e6515 Explain how to choose your favorite Eigen version
(grafted from 0c4d05b009
)
2016-12-06 11:34:06 +01:00
Silvio Traversaro
75f0b8aae3 Added relocatable cmake support also for CMake before 3.0 and after 2.8.8
(grafted from e049a2a72a
)
2016-12-06 10:37:34 +01:00
Silvio Traversaro
0164f4c682 Make CMake config file relocatable
(grafted from 18481b518f
)
2016-12-05 10:39:52 +01:00
Gael Guennebaud
bbff608a42 Merged in angelos_m/eigen/3.3 (pull request PR-264)
add explicit template to numext::abs2 and fix signed/unsigned warning
2016-12-05 21:56:01 +00:00
Gael Guennebaud
ea56d2ff2c Fix memory leak in Ref<Sparse>
(grafted from a6b971e291
)
2016-12-05 16:59:30 +01:00
Gael Guennebaud
a4c8701e9a bug #1356: fix calls to evaluator::coeffRef(0,0) to get the address of the destination
by adding a dstDataPtr() member to the kernel. This fixes undefined behavior if dst is empty (nullptr).
(grafted from 0db6d5b3f4
)
2016-12-05 15:08:09 +01:00
Angelos Mantzaflaris
0a08d4c60b use numext::abs 2016-12-02 11:48:06 +01:00
Angelos Mantzaflaris
4086187e49 1. Add explicit template to abs2 (resolves deduction for some arithmetic types)
2. Avoid signed-unsigned conversion in comparison (warning in case Scalar is unsigned)
2016-12-02 11:39:18 +01:00
Gael Guennebaud
c3597106ab Merged in angelos_m/eigen/3.3 (pull request PR-263)
fix two warnings(unused typedef, unused variable) and a typo
2016-12-02 09:02:39 +00:00
Gael Guennebaud
aed1d6597f Clean up SparseCore module regarding ReverseInnerIterator
(grafted from 27873008d4
)
2016-12-01 21:55:10 +01:00
Angelos Mantzaflaris
b6f04a2dd4 typo UIntPtr 2016-12-01 21:25:58 +01:00
Angelos Mantzaflaris
a9aa3bcf50 fix two warnings(unused typedef, unused variable) and a typo 2016-12-01 21:23:43 +01:00
Gael Guennebaud
32b8da66e3 fix member order
(grafted from 181138a1cb
)
2016-12-01 17:06:20 +01:00
Gael Guennebaud
eb94179ea3 Merged in sergiu/eigen/cmake-imported-target (pull request PR-257)
CMake imported target (take #2)
2016-12-01 15:13:48 +00:00
Gael Guennebaud
52a7386aef Fix misleading-indentation warnings.
(grafted from 037b46762d
)
2016-12-01 16:05:42 +01:00
Gael Guennebaud
8cada1d894 Fix slection of product implementation for dynamic size matrices with fixed max size.
(grafted from 8df272af88
)
2016-11-30 22:21:33 +01:00
Gael Guennebaud
6e4a664c42 Fix a performance regression in (mat*mat)*vec for which mat*mat was evaluated multiple times.
(grafted from c927af60ed
)
2016-11-30 17:59:13 +01:00
Gael Guennebaud
1cd1a96d56 bug #1351: fix compilation of random with old compilers
(grafted from ab4ef5e66e
)
2016-11-30 17:37:53 +01:00
Sergiu Deitsch
86ab00cdcf cmake: remove architecture dependency from Eigen3ConfigVersion.cmake
Also, install Eigen3*.cmake under $prefix/share/eigen3/cmake by default.
2016-11-30 15:46:46 +01:00
Sergiu Deitsch
65f09be8d2 doc: mention the NO_MODULE option and target availability 2016-11-30 15:41:38 +01:00
Gael Guennebaud
400d756b82 bug #1348: Document EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES,
and reflect in the doc that EIGEN_DONT_ALIGN* are deprecated.
(grafted from 21d0286d81
)
2016-11-23 22:15:03 +01:00
Gael Guennebaud
9d31798a84 update cdash project for 3.3 2016-11-23 14:13:08 +01:00
Gael Guennebaud
0a7de0b273 Fix compilation issue with MSVC:
MSVC always messes up with shadowed template arguments, for instance in:
  struct B { typedef float T; }
  template<typename T> struct A : B {
    T g;
  };
The type of A<double>::g will be float and not double.
(grafted from a91de27e98
)
2016-11-23 12:24:48 +01:00
Sergiu Deitsch
a287140f72 cmake: added Eigen3::Eigen imported target 2016-11-22 12:25:06 +01:00
Gael Guennebaud
4d89ec8a00 Fix regression in assigment of sparse block to spasre block.
(grafted from 6a84246a6a
)
2016-11-21 21:46:42 +01:00
Chun Wang
441760f239 Workaround for error in VS2012 with /clr
(grafted from 0d0948c3b9
)
2016-11-17 17:54:27 -05:00
Gael Guennebaud
664162fb8a Fix compilation issue in mat = permutation (regression introduced in 8193ffb3d3
)
(grafted from 465ede0f20
)
2016-11-20 09:41:37 +01:00
Gael Guennebaud
aa3c761002 bug #1343: fix compilation regression in mat+=selfadjoint_view.
Generic EigenBase2EigenBase assignment was incomplete.
(grafted from 8193ffb3d3
)
2016-11-18 10:17:34 +01:00
Gael Guennebaud
94f2cfc9c7 bug #1343: fix compilation regression in array = matrix_product
(grafted from cebff7e3a2
)
2016-11-18 10:09:33 +01:00
Konstantinos Margaritis
4a13d79df6 replace sizeof(Packet) with PacketSize else it breaks for ZVector.Packet4f
(grafted from a1d5c503fa
)
2016-11-17 13:27:45 -05:00
Konstantinos Margaritis
463176cc44 implement float/std::complex<float> for ZVector as well, minor fixes to ZVector
(grafted from 672aa97d4d
)
2016-11-17 13:27:33 -05:00
Gael Guennebaud
5aab97fba6 Optimize sparse<bool> && sparse<bool> to use the same path as for coeff-wise products.
(grafted from 0ee92aa38e
)
2016-11-14 18:47:41 +01:00
Gael Guennebaud
89abc6806d bug #426: move operator && and || to MatrixBase and SparseMatrixBase.
(grafted from 2e334f5da0
)
2016-11-14 18:47:02 +01:00
Niels Ole Salscheider
baf793ebaa Make sure not to call numext::maxi on expression templates
(grafted from 51fef87408
)
2016-11-12 12:20:57 +01:00
Gael Guennebaud
b4ddafcfac Fix regression in SparseMatrix::ReverseInnerIterator
(grafted from eedb87f4ba
)
2016-11-14 14:05:53 +01:00
Gael Guennebaud
1079967710 Added tag 3.3.0 for changeset eeac81b8c0 2016-11-10 13:57:29 +01:00
Gael Guennebaud
eeac81b8c0 bump to 3.3.0 2016-11-10 13:55:14 +01:00
Gael Guennebaud
e80bc2ddb0 Fix printing of sparse expressions 2016-11-10 10:35:32 +01:00
Benoit Steiner
db3903498d Merged in benoitsteiner/opencl (pull request PR-246)
Improved support for OpenCL
2016-11-08 22:28:44 +00:00
Benoit Steiner
dcc14bee64 Fixed the formatting of the code 2016-11-08 14:24:46 -08:00
Benoit Steiner
b88c1117d4 Fixed the indentation of the cmake file 2016-11-08 14:22:36 -08:00
Luke Iwanski
912cb3d660 #if EIGEN_EXCEPTION -> #ifdef EIGEN_EXCEPTIONS. 2016-11-08 22:01:14 +00:00
Luke Iwanski
1b345b0895 Fix for SYCL queue initialisation. 2016-11-08 21:56:31 +00:00
Luke Iwanski
1b95717358 Use try/catch only when exceptions are enabled. 2016-11-08 21:08:53 +00:00
Mehdi Goli
d57430dd73 Converting all sycl buffers to uninitialised device only buffers; adding memcpyHostToDevice and memcpyDeviceToHost on syclDevice; modifying all examples to obey the new rules; moving sycl queue creating to the device based on Benoit suggestion; removing the sycl specefic condition for returning m_result in TensorReduction.h according to Benoit suggestion. 2016-11-08 17:08:02 +00:00
Gael Guennebaud
73985ead27 Extend unit test to check sparse solvers with a SparseVector as the rhs and result. 2016-11-06 20:29:57 +01:00
Gael Guennebaud
436a111792 Generalize Cholmod support to hanlde any sparse type as the rhs and result of the solve method 2016-11-06 20:29:23 +01:00
Gael Guennebaud
afc55b1885 Generalize IterativeSolverBase::solve to hanlde any sparse type as the results (instead of SparseMatrix only) 2016-11-06 20:28:18 +01:00
Gael Guennebaud
a5c2d8a3cc Generalize solve_sparse_through_dense_panels to handle SparseVector. 2016-11-06 15:20:58 +01:00
Gael Guennebaud
f8bfe10613 Add missing friend declaration 2016-11-06 15:20:30 +01:00
Gael Guennebaud
fc7180cda8 Add a default ctor to evaluator<SparseVector>.
Needed for evaluator<Solve>.
2016-11-06 15:20:00 +01:00
Gael Guennebaud
4d226ab5b5 Enable swapping between SparseMatrix and SparseVector 2016-11-06 15:15:03 +01:00
Benoit Steiner
ad086b03e4 Removed unnecessary statement 2016-11-05 12:43:27 -07:00
Benoit Steiner
dad177be01 Added missing includes 2016-11-05 10:04:42 -07:00
Gael Guennebaud
55b4fd1d40 Extend mpreal unit test to check LLT with complexes. 2016-11-05 11:28:53 +01:00
Gael Guennebaud
a354c3ca59 Fix compilation of LLT with complex<mpreal>. 2016-11-05 11:28:29 +01:00
Benoit Steiner
d46a36cc84 Merged eigen/eigen into default 2016-11-04 18:22:55 -07:00
Mehdi Goli
0ebe3808ca Removed the sycl include from Eigen/Core and moved it to Unsupported/Eigen/CXX11/Tensor; added TensorReduction for sycl (full reduction and partial reduction); added TensorReduction test case for sycl (full reduction and partial reduction); fixed the tile size on TensorSyclRun.h based on the device max work group size; 2016-11-04 18:18:19 +00:00
Gael Guennebaud
47d1b4a609 Added tag 3.3-rc2 for changeset ba05572dcb 2016-11-04 09:09:18 +01:00
Gael Guennebaud
ba05572dcb bump to 3.3-rc2 2016-11-04 09:09:06 +01:00
Benoit Steiner
5c3995769c Improved AVX512 configuration 2016-11-03 04:50:28 -07:00
Benoit Steiner
fbe672d599 Reenable the generation of dynamic blas libraries. 2016-11-03 04:08:43 -07:00
Benoit Steiner
ca0ba0d9a4 Improved AVX512 support 2016-11-03 04:00:49 -07:00
Benoit Steiner
c80587c92b Merged eigen/eigen into default 2016-11-03 03:55:11 -07:00
Gael Guennebaud
3f1d0cdc22 bug #1337: improve doc of homogeneous() and hnormalized() 2016-11-03 11:03:08 +01:00
Gael Guennebaud
78e93ac1ad bug #1330: Cholmod supports double precision only, so let's trigger a static assertion if the scalar type does not match this requirement. 2016-11-03 10:21:59 +01:00
Benoit Steiner
3e37166d0b Merged in benoitsteiner/opencl (pull request PR-244)
Disable vectorization on device only when compiling for sycl
2016-11-02 22:01:03 +00:00
Benoit Steiner
0585b2965d Disable vectorization on device only when compiling for sycl 2016-11-02 11:44:27 -07:00
Benoit Steiner
e6e77ed08b Don't call lgamma_r when compiling for an Apple device, since the function isn't available on MacOS 2016-11-02 09:55:39 -07:00
Benoit Steiner
b238f387b4 Pulled latest updates from trunk 2016-11-02 08:53:13 -07:00
Benoit Steiner
c8db17301e Special functions require math.h: make sure it is included. 2016-11-02 08:51:52 -07:00
Gael Guennebaud
a07bb428df bug #1004: improve accuracy of LinSpaced for abs(low) >> abs(high). 2016-11-02 11:34:38 +01:00
Gael Guennebaud
598de8b193 Add pinsertfirst function and implement pinsertlast for complex on SSE/AVX. 2016-11-02 10:38:13 +01:00
Benoit Steiner
e44519744e Merged in benoitsteiner/opencl (pull request PR-243)
Fixed the ambiguity in callig make_tuple for sycl backend.
2016-11-02 02:56:58 +00:00
Rasmus Munk Larsen
0a6ae41555 Merged eigen/eigen into default 2016-11-01 15:37:00 -07:00
Rasmus Munk Larsen
b730952414 Don't attempts to use lgamma_r for CUDA devices.
Fix type in lgamma_impl<double>.
2016-11-01 15:34:19 -07:00
Benoit Steiner
7a0e96b80d Gate the code that refers to cuda fp16 primitives more thoroughly 2016-11-01 12:08:09 -07:00
Mehdi Goli
51af6ae971 Fixed the ambiguity in callig make_tuple for sycl backend. 2016-10-31 16:35:51 +00:00
Benoit Steiner
0a9ad6fc72 Worked around Visual Studio compilation errors 2016-10-28 07:54:27 -07:00
Benoit Steiner
d5f88e2357 Sharded the tensor_image_patch test to help it run on low power devices 2016-10-27 21:48:21 -07:00
Benoit Steiner
0b4b0f11e8 Fixed a few more compilation warnings 2016-10-28 04:01:01 +00:00
Benoit Steiner
306daa24a3 Fixed a compilation warning 2016-10-28 03:50:31 +00:00
Benoit Steiner
8471cf1996 Fixed compilation warning 2016-10-28 03:46:08 +00:00
Benoit Steiner
b0c5bfdf78 Added missing template parameters 2016-10-28 03:43:41 +00:00
Rasmus Munk Larsen
2ebb314fa7 Use threadsafe versions of lgamma and lgammaf if possible. 2016-10-27 16:17:12 -07:00
Gael Guennebaud
530f20c21a Workaround MSVC issue. 2016-10-27 21:51:37 +02:00
Gael Guennebaud
c3ce4f9ac0 Merged in enricodetoma/eigen (pull request PR-241)
Always enable /bigobj for tests to avoid a compile error in MSVC 2015
2016-10-27 19:21:28 +00:00
Benoit Steiner
7d64e6752c Pulled latest updates from trunk 2016-10-26 18:48:06 -07:00
Benoit Steiner
0a4c4d40b4 Removed a template parameter for fixed sized tensors 2016-10-26 18:47:37 -07:00
Gael Guennebaud
3ecb343dc3 Fix regression in X = (X*X.transpose())/s with X rectangular by deferring resizing of the destination after the creation of the evaluator of the source expression. 2016-10-26 22:50:41 +02:00
enrico.detoma
6ed571744b Always enable /bigobj for tests to avoid a compile error in MSVC 2015 2016-10-26 22:48:46 +02:00
Gael Guennebaud
97feea9d39 add a generic EIGEN_HAS_CXX11 2016-10-26 15:53:13 +02:00
Gael Guennebaud
ca6a2a5248 Fix warning with ICC 2016-10-26 14:13:05 +02:00
Benoit Steiner
5f2dd503ff Replaced tabs with spaces 2016-10-25 20:40:58 -07:00
Benoit Steiner
1644bafe29 Code cleanup 2016-10-25 20:36:14 -07:00
Gael Guennebaud
b15a5dc3f4 Fix ICC warnings 2016-10-25 22:20:24 +02:00
Gael Guennebaud
aad72f3c6d Add missing inline keywords 2016-10-25 20:20:09 +02:00
Benoit Steiner
3e194a6a73 Fixed a typo 2016-10-25 08:42:15 -07:00
Gael Guennebaud
58146be99b bug #1004: one more rewrite of LinSpaced for floating point numbers to guarantee both interpolation and monotonicity.
This version simply does low+i*step plus a branch to return high if i==size-1.
Vectorization is accomplished with a branch and the help of pinsertlast.
Some quick benchmark revealed that the overhead is really marginal, even when filling small vectors.
2016-10-25 16:53:09 +02:00
Gael Guennebaud
13fc18d3a2 Add a pinsertlast function replacing the last entry of a packet by a scalar.
(useful to vectorize LinSpaced)
2016-10-25 16:48:49 +02:00
Gael Guennebaud
2634f9386c bug #1333: fix bad usage of const_cast_derived. Better use .data() for that purpose. 2016-10-24 22:22:35 +02:00
Gael Guennebaud
9e8f07d7b5 Cleanup ArrayWrapper and MatrixWrapper by removing redundant accessors. 2016-10-24 22:16:48 +02:00
Gael Guennebaud
b027d7a8cf bug #1004: remove the inaccurate "sequential" path for LinSpaced, mark respective function as deprecated, and enforce strict interpolation of the higher range using a correction term.
Now, even with floating point precision, both the 'low' and 'high' bounds are exactly reproduced at i=0 and i=size-1 respectively.
2016-10-24 20:27:21 +02:00
Benoit Steiner
b11aab5fcc Merged in benoitsteiner/opencl (pull request PR-238)
Added support for OpenCL to the Tensor Module
2016-10-24 15:30:45 +00:00
Gael Guennebaud
53c77061f0 bug #698: rewrite LinSpaced for integer scalar types to avoid overflow and guarantee an even spacing when possible.
Otherwise, the "high" bound is implicitly lowered to the largest value allowing for an even distribution.
This changeset also disable vectorization for this integer path.
2016-10-24 15:50:27 +02:00
Gael Guennebaud
e8e56c7642 Add unit test for overflow in LinSpaced 2016-10-24 15:43:51 +02:00
Gael Guennebaud
40f62974b7 bug #1328: workaround a compilation issue with gcc 4.2 2016-10-20 19:19:37 +02:00
Benoit Steiner
cf20b30d65 Merge latest updates from trunk 2016-10-20 09:42:05 -07:00
Luke Iwanski
03b63e182c Added SYCL include in Tensor. 2016-10-20 15:32:44 +01:00
Benoit Steiner
d3943cd50c Fixed a few typos in the ternary tensor expressions types 2016-10-19 12:56:12 -07:00
Mehdi Goli
8fb162fc85 Fixing the typo regarding missing #if needed for proper handling of exceptions in Eigen/Core. 2016-10-16 12:52:34 +01:00
Mehdi Goli
e36cb91c99 Fixing the code indentation in the TensorReduction.h file. 2016-10-14 18:03:00 +01:00
Luke Iwanski
2e188dd4d4 Merged ComputeCpp to default. 2016-10-14 16:47:40 +01:00
Mehdi Goli
15380f9a87 Applyiing Benoit's comment to return the missing line back in Eigen/Core 2016-10-14 16:39:41 +01:00
Gael Guennebaud
692b30ca95 Fix previous merge. 2016-10-14 17:16:28 +02:00
Gael Guennebaud
050c681bdd Merged in rmlarsen/eigen2 (pull request PR-232)
Improve performance of parallelized matrix multiply for rectangular matrices
2016-10-14 14:51:09 +00:00
Luke Iwanski
e742da8b28 Merged ComputeCpp into default. 2016-10-14 13:36:51 +01:00
Mehdi Goli
524fa4c46f Reducing the code by generalising sycl backend functions/structs. 2016-10-14 12:09:55 +01:00
Benoit Steiner
737e4152c3 Merged in lukier/eigen (pull request PR-234)
Enabling CUDA in Geometry
2016-10-13 18:09:28 +00:00
Benoit Steiner
d0ee2267d6 Relaxed the resizing checks so that they don't fail with gcc >= 5.3 2016-10-13 10:59:46 -07:00
Robert Lukierski
a94791b69a Fixes for min and abs after Benoit's comments, switched to numext. 2016-10-13 15:00:22 +01:00
Avi Ginsburg
ac63d6891c Patch to allow VS2015 & CUDA 8.0 to compile with Eigen included. I'm not sure
whether to limit the check to this compiler combination
(` || (EIGEN_COMP_MSVC == 1900 &&  __CUDACC_VER__) `)
or to leave it as it is. I also don't know if this will have any affect on
including Eigen in device code (I'm not in my current project).
2016-10-13 08:47:32 +00:00
Benoit Steiner
7e4a6754b2 Merged eigen/eigen into default 2016-10-12 22:42:33 -07:00
Benoit Steiner
38b6048e14 Deleted redundant implementation of predux 2016-10-12 14:37:56 -07:00
Gael Guennebaud
e74612b9a0 Remove double ;; 2016-10-12 22:49:47 +02:00
Benoit Steiner
78d2926508 Merged eigen/eigen into default 2016-10-12 13:46:29 -07:00
Benoit Steiner
2e2f48e30e Take advantage of AVX512 instructions whenever possible to speedup the processing of 16 bit floats. 2016-10-12 13:45:39 -07:00
Gael Guennebaud
f939c351cb Fix SPQR for rectangular matrices 2016-10-12 22:39:33 +02:00
Gael Guennebaud
091d373ee9 Fix outer-stride. 2016-10-12 21:47:52 +02:00
Robert Lukierski
471075f7ad Fixes min() warnings. 2016-10-12 18:59:05 +01:00
Gael Guennebaud
5c366fe1d7 Merged in rmlarsen/eigen (pull request PR-230)
Fix a bug in psqrt for SSE and AVX when EIGEN_FAST_MATH=1
2016-10-12 16:30:51 +00:00
Robert Lukierski
86711497c4 Adding EIGEN_DEVICE_FUNC in the Geometry module.
Additional CUDA necessary fixes in the Core (mostly usage of
EIGEN_USING_STD_MATH).
2016-10-12 16:35:17 +01:00
Rasmus Munk Larsen
47150af1c8 Fix copy-paste error: Must use _mm256_cmp_ps for AVX. 2016-10-12 08:34:39 -07:00
Gael Guennebaud
89e315152c bug #1325: fix compilation on NEON with clang 2016-10-12 16:55:47 +02:00
Benoit Steiner
7f0599b6eb Manually define int16_t and uint16_t when compiling with Visual Studio 2016-10-08 22:56:32 -07:00
Benoit Steiner
5727e4d89c Reenabled the use of variadic templates on tegra x1 provides that the latest version (i.e. JetPack 2.3) is used. 2016-10-08 22:19:03 +00:00
Benoit Steiner
5266ff8966 Cleaned up a regression test 2016-10-08 19:12:44 +00:00
Benoit Steiner
5c68051cd7 Merge the content of the ComputeCpp branch into the default branch 2016-10-07 11:04:16 -07:00
Gael Guennebaud
4860727ac2 Remove static qualifier of free-functions (inline is enough and this helps ICC to find the right overload) 2016-10-07 09:21:12 +02:00
Benoit Steiner
507b661106 Renamed predux_half into predux_downto4 2016-10-06 17:57:04 -07:00
Benoit Steiner
a498ff7df6 Fixed incorrect comment 2016-10-06 15:27:27 -07:00
Benoit Steiner
8ba3c41fcf Revergted unecessary change 2016-10-06 15:12:15 -07:00
Benoit Steiner
a7473d6d5a Fixed compilation error with gcc >= 5.3 2016-10-06 14:33:22 -07:00
Benoit Steiner
5e64cea896 Silenced a compilation warning 2016-10-06 14:24:17 -07:00
Benoit Steiner
33fba3f08d Merged in rryan/eigen/tensorfunctors (pull request PR-233)
Fully support complex types in SumReducer and MeanReducer when building for CUDA by using scalar_sum_op and scalar_product_op instead of operator+ and operator*.
2016-10-06 12:29:19 -07:00
RJ Ryan
bfc264abe8 Add a test that GPU complex product reductions match CPU reductions. 2016-10-06 11:10:14 -07:00
RJ Ryan
e2e9cdd169 Fully support complex types in SumReducer and MeanReducer when building for CUDA by using scalar_sum_op and scalar_product_op instead of operator+ and operator*. 2016-10-06 10:49:48 -07:00
Benoit Steiner
d485d12c51 Added missing AVX intrinsics for fp16: in particular, implemented predux which is required by the matrix-vector code. 2016-10-06 10:41:03 -07:00
Rasmus Munk Larsen
48c635e223 Add a simple cost model to prevent Eigen's parallel GEMM from using too many threads when the inner dimension is small.
Timing for square matrices is unchanged, but both CPU and Wall time are significantly improved for skinny matrices. The benchmarks below are for multiplying NxK * KxN matrices with test names of the form BM_OuterishProd/N/K.

Improvements in Wall time:

Run on [redacted] (12 X 3501 MHz CPUs); 2016-10-05T17:40:02.462497196-07:00
CPU: Intel Haswell with HyperThreading (6 cores) dL1:32KB dL2:256KB dL3:15MB
Benchmark                          Base (ns)  New (ns) Improvement
------------------------------------------------------------------
BM_OuterishProd/64/1                    3088      1610    +47.9%
BM_OuterishProd/64/4                    3562      2414    +32.2%
BM_OuterishProd/64/32                   8861      7815    +11.8%
BM_OuterishProd/128/1                  11363      6504    +42.8%
BM_OuterishProd/128/4                  11128      9794    +12.0%
BM_OuterishProd/128/64                 27691     27396     +1.1%
BM_OuterishProd/256/1                  33214     28123    +15.3%
BM_OuterishProd/256/4                  34312     36818     -7.3%
BM_OuterishProd/256/128               174866    176398     -0.9%
BM_OuterishProd/512/1                7963684    104224    +98.7%
BM_OuterishProd/512/4                7987913    112867    +98.6%
BM_OuterishProd/512/256              8198378   1306500    +84.1%
BM_OuterishProd/1k/1                 7356256    324432    +95.6%
BM_OuterishProd/1k/4                 8129616    331621    +95.9%
BM_OuterishProd/1k/512              27265418   7517538    +72.4%

Improvements in CPU time:

Run on [redacted] (12 X 3501 MHz CPUs); 2016-10-05T17:40:02.462497196-07:00
CPU: Intel Haswell with HyperThreading (6 cores) dL1:32KB dL2:256KB dL3:15MB
Benchmark                          Base (ns)  New (ns) Improvement
------------------------------------------------------------------
BM_OuterishProd/64/1                    6169      1608    +73.9%
BM_OuterishProd/64/4                    7117      2412    +66.1%
BM_OuterishProd/64/32                  17702     15616    +11.8%
BM_OuterishProd/128/1                  45415      6498    +85.7%
BM_OuterishProd/128/4                  44459      9786    +78.0%
BM_OuterishProd/128/64                110657    109489     +1.1%
BM_OuterishProd/256/1                 265158     28101    +89.4%
BM_OuterishProd/256/4                 274234    183885    +32.9%
BM_OuterishProd/256/128              1397160   1408776     -0.8%
BM_OuterishProd/512/1               78947048    520703    +99.3%
BM_OuterishProd/512/4               86955578   1349742    +98.4%
BM_OuterishProd/512/256             74701613  15584661    +79.1%
BM_OuterishProd/1k/1                78352601   3877911    +95.1%
BM_OuterishProd/1k/4                78521643   3966221    +94.9%
BM_OuterishProd/1k/512              258104736  89480530    +65.3%
2016-10-06 10:33:10 -07:00
Benoit Steiner
9f3276981c Enabling AVX512 should also enable AVX2. 2016-10-06 10:29:48 -07:00
Gael Guennebaud
80b5133789 Fix compilation of qr.inverse() for column and full pivoting variants. 2016-10-06 09:55:50 +02:00
Benoit Steiner
4131074818 Deleted unecessary CMakeLists.txt file 2016-10-05 18:54:35 -07:00
Benoit Steiner
cb5cd69872 Silenced a compilation warning. 2016-10-05 18:50:53 -07:00
Benoit Steiner
78b569f685 Merged latest updates from trunk 2016-10-05 18:48:55 -07:00
Benoit Steiner
9c2b6c049b Silenced a few compilation warnings 2016-10-05 18:37:31 -07:00
Benoit Steiner
6f3cd529af Pulled latest updates from trunk 2016-10-05 18:31:43 -07:00
Benoit Steiner
d7f9679a34 Fixed a couple of compilation warnings 2016-10-05 15:00:32 -07:00
Benoit Steiner
ae1385c7e4 Pull the latest updates from trunk 2016-10-05 14:54:36 -07:00
Benoit Steiner
73b0012945 Fixed compilation warnings 2016-10-05 14:24:24 -07:00
Benoit Steiner
c84084c0c0 Fixed compilation warning 2016-10-05 14:15:41 -07:00
Benoit Steiner
4387433acf Increased the robustness of the reduction tests on fp16 2016-10-05 10:42:41 -07:00
Benoit Steiner
aad20d700d Increase the tolerance to numerical noise. 2016-10-05 10:39:24 -07:00
Benoit Steiner
8b69d5d730 ::rand() returns a signed integer on win32 2016-10-05 08:55:02 -07:00
Benoit Steiner
ed7a220b04 Fixed a typo that impacts windows builds 2016-10-05 08:51:31 -07:00
Benoit Steiner
ceee1c008b Silenced compilation warning 2016-10-04 18:47:53 -07:00
Benoit Steiner
698ff69450 Properly characterize the CUDA packet primitives for fp16 as device only 2016-10-04 16:53:30 -07:00
Rasmus Munk Larsen
7f67e6dfdb Update comment for fast sqrt. 2016-10-04 15:09:11 -07:00
Rasmus Munk Larsen
765615609d Update comment for fast sqrt. 2016-10-04 15:08:41 -07:00
Rasmus Munk Larsen
3ed67cb0bb Fix a bug in the implementation of Carmack's fast sqrt algorithm in Eigen (enabled by EIGEN_FAST_MATH), which causes the vectorized parts of the computation to return -0.0 instead of NaN for negative arguments.
Benchmark speed in Giga-sqrts/s
Intel(R) Xeon(R) CPU E5-1650 v3 @ 3.50GHz
-----------------------------------------
                    SSE        AVX
Fast=1              2.529G     4.380G
Fast=0              1.944G     1.898G
Fast=1 fixed        2.214G     3.739G

This table illustrates the worst case in terms speed impact: It was measured by repeatedly computing the sqrt of an n=4096 float vector that fits in L1 cache. For large vectors the operation becomes memory bound and the differences between the different versions almost negligible.
2016-10-04 14:22:56 -07:00
Benoit Steiner
6af5ac7e27 Cleanup the cuda executor code. 2016-10-04 08:52:13 -07:00
Benoit Steiner
2f6d1607c8 Cleaned up the random number generation code. 2016-10-04 08:38:23 -07:00
Benoit Steiner
881b90e984 Use explicit type casting to generate packets of zeros. 2016-10-04 08:23:38 -07:00
Benoit Steiner
616a7a1912 Improved support for compiling CUDA code with clang as the host compiler 2016-10-03 17:09:33 -07:00
Benoit Steiner
409e887d78 Added support for constand std::complex numbers on GPU 2016-10-03 11:06:24 -07:00
Gael Guennebaud
9d6d0dff8f bug #1317: fix performance regression with some Block expressions and clang by helping it to remove dead code.
The trick is to get rid of the nested expression in the evaluator by copying only the required information (here, the strides).
2016-10-01 15:37:00 +02:00
Gael Guennebaud
8b84801f7f bug #1310: workaround a compilation regression from 3.2 regarding triangular * homogeneous 2016-09-30 22:49:59 +02:00
Benoit Steiner
422530946f Renamed the SYCL tests to follow the standard naming convention. 2016-09-30 08:22:10 -07:00
Gael Guennebaud
67b4f45836 Fix angle range 2016-09-30 12:46:33 +02:00
Gael Guennebaud
27f3970453 Remove std:: prefix 2016-09-30 12:40:41 +02:00
Gael Guennebaud
3860a0bc8f bug #1312: Quaternion to AxisAngle conversion now ensures the angle will be in the range [-pi,pi]. This also increases accuracy when q.w is negative. 2016-09-29 23:23:35 +02:00
Gael Guennebaud
33500050c3 bug #1308: fix compilation of some small products involving nullary-expressions. 2016-09-29 09:40:44 +02:00
Benoit Steiner
27d7628f16 Updated the list of warnings to reflect the new message ids introduced in cuda 8.0 2016-09-28 17:42:59 -07:00
Benoit Steiner
2bda1b0d93 Updated the tensor sum and mean reducer to enable them to process complex numbers on cuda gpus. 2016-09-28 17:08:41 -07:00
Mehdi Goli
dd602e62c8 Converting alias template to nested struct in order to be compatible with CXX-03 2016-09-27 16:21:19 +01:00
Gael Guennebaud
f3a00dd2b5 Merged in sergiu/eigen (pull request PR-229)
Disabled MSVC level 4 warning C4714
2016-09-27 09:28:08 +02:00
Gael Guennebaud
892afb9416 Add debug info. 2016-09-26 23:53:57 +02:00
Gael Guennebaud
779774f98c bug #1311: fix alignment logic in some cases of (scalar*small).lazyProduct(small) 2016-09-26 23:53:40 +02:00
Benoit Steiner
6565f8d60f Made the initialization of a CUDA device thread safe. 2016-09-26 11:00:32 -07:00
Gael Guennebaud
48dfe98abd bug #1308: fix compilation of vector * rowvector::nullary. 2016-09-25 14:54:35 +02:00
Sergiu Deitsch
fe29157d02 disabled MSVC level 4 warning C4714
The level 4 warning (/W4) warns about functions marked as __forceinline not
inlined, and generates a lot of noise.
2016-09-25 14:25:47 +02:00
Benoit Steiner
f6ac51a054 Made TensorEvalTo compatible with c++0x again. 2016-09-23 16:45:17 -07:00
Benoit Steiner
00d4e65f00 Deleted unused TensorMap data member 2016-09-23 16:44:45 -07:00
Gael Guennebaud
86caba838d bug #1304: fix Projective * scaling and Projective *= scaling 2016-09-23 13:41:21 +02:00
Gael Guennebaud
b9f7a17e47 Add missing file. 2016-09-23 10:26:08 +02:00
Benoit Steiner
1301d744f8 Made the gaussian generator usable on GPU 2016-09-22 19:04:44 -07:00
Benoit Steiner
2a69290ddb Added a specialization of Eigen::numext::real and Eigen::numext::imag for std::complex<T> to be used when compiling a cuda kernel. This is unfortunately necessary to be able to process complex numbers from a CUDA kernel on MacOS. 2016-09-22 15:52:23 -07:00
Gael Guennebaud
3946768916 Added tag 3.3-rc1 for changeset 77e27fbeee 2016-09-22 22:38:36 +02:00
Luke Iwanski
c771df6bc3 Updated the owners of the file. 2016-09-19 14:09:25 +01:00
Luke Iwanski
b91e021172 Merged with default. 2016-09-19 14:03:54 +01:00
Luke Iwanski
cb81975714 Partial OpenCL support via SYCL compatible with ComputeCpp CE. 2016-09-19 12:44:13 +01:00
Benoit Steiner
f899e08946 Enabled a number of tests previously disabled by mistake 2016-05-03 14:07:47 -07:00
Benoit Steiner
4c05fb03a3 Merged eigen/eigen into default 2016-05-03 13:15:00 -07:00
Benoit Steiner
577a07a86e Re-enabled the product_small test now that everything compiles correctly. 2016-05-03 13:11:38 -07:00
Benoit Steiner
3b8da4be5a Extended the packetmath test to cover all the alignments made possible by avx512 instructions. 2016-04-29 14:13:43 -07:00
Benoit Steiner
2f28ccbea3 Update the makefile to make the tests compile with gcc 4.9 2016-04-29 14:11:09 -07:00
Benoit Steiner
7a4bd337d9 Resolved merge conflict 2016-04-29 13:42:22 -07:00
Benoit Steiner
07a247dcf4 Pulled latest updates from upstream 2016-04-29 13:41:26 -07:00
Benoit Steiner
fa5a8f055a Implemented palign_impl for AVX512 2016-04-29 13:30:13 -07:00
Benoit Steiner
ef3ac9d05a Fixed the AVX512 packet traits 2016-04-29 13:28:36 -07:00
Benoit Steiner
d7b75e8d86 Added pdiv packet primitives for avx512 2016-04-29 13:26:47 -07:00
Benoit Steiner
5e89ded685 Implemented preduxp for AVX512 2016-04-29 13:00:33 -07:00
Benoit Steiner
5f85662ad8 Implemented the pabs and preverse primitives for avx512. 2016-04-29 12:53:34 -07:00
Benoit Steiner
d37ee89ca8 Disabled some of the AVX512 primitives on compilers that don't support them 2016-04-29 12:50:29 -07:00
Benoit Steiner
8bfe739cd2 Updated the AVX512 PacketMath to properly leverage the AVX512DQ instructions 2016-04-11 18:40:16 -07:00
Benoit Steiner
d6e596174d Pull latest updates from upstream 2016-04-11 17:20:17 -07:00
Benoit Steiner
3ca1ae2bb7 Commented out the version of pexp<Packet8d> since it fails to compile with gcc 5.3 2016-02-04 13:49:06 -08:00
Benoit Steiner
23f69ab936 Added implementations of pexp, plog, psqrt, and prsqrt optimized for AVX512 2016-02-04 10:36:36 -08:00
Benoit Steiner
6c9cf117c1 Fixed indentation 2016-02-04 10:34:10 -08:00
Benoit Steiner
d93b71a301 Updated the packetmath test to call predux_half instead of predux4 2016-02-01 15:18:33 -08:00
Benoit Steiner
ef66f2887b Updated the matrix multiplication code to make it compile with AVX512 enabled. 2016-02-01 14:38:05 -08:00
Benoit Steiner
85b6d82b49 Generalized predux4 to support AVX512 packets, and renamed it predux_half.
Disabled the implementation of pabs for avx512 since the corresponding intrinsics are not shipped with gcc
2016-02-01 14:35:51 -08:00
Benoit Steiner
c1a42c2d0d Don't disable the AVX implementations of plset when compiling with AVX512 enabled 2016-01-14 17:21:39 -08:00
Benoit Steiner
0366478df8 Added alignment requirement to the AVX512 packet traits. 2016-01-14 17:02:39 -08:00
Benoit Steiner
3cfd16f3af Fixed the signature of the plset primitives for AVX512 2016-01-14 16:58:01 -08:00
Benoit Steiner
67f44365ea Fixed the AVX512 signature of the ptranspose primitives 2016-01-14 16:51:11 -08:00
Benoit Steiner
a282eb1363 pscatter/pgather use Index instead of int to specify the stride 2016-01-14 16:39:39 -08:00
Benoit Steiner
7832485575 Deleted unnecessary commas and semicolons 2016-01-14 16:36:29 -08:00
Benoit Steiner
99093c0fe0 Added support for AVX512 to the build files 2016-01-05 10:02:49 -08:00
Benoit Steiner
9f9d8d2f62 Disabled part of the matrix matrix peeling code that's incompatible with 512 bit registers 2015-12-21 13:04:52 -08:00
Benoit Steiner
b74887d5f2 Implemented most of the packet primitives for AVX512 2015-12-21 11:46:36 -08:00
Benoit Steiner
6ffb208c77 Make sure EIGEN_HAS_MM_MALLOC is set to 1 when using the avx512 instruction set. 2015-12-21 11:23:15 -08:00
Benoit Steiner
994d1c60b9 Free memory allocated using posix_memalign() with free() instead of std::free() 2015-12-21 11:21:39 -08:00
Benoit Steiner
b8861b0c25 Make sure the data is aligned on a 64 byte boundary when using avx512 instructions. 2015-12-11 09:19:57 -08:00
Benoit Steiner
9a415fb1e2 Preliminary support for AVX512 2015-12-10 15:34:57 -08:00
160 changed files with 8621 additions and 1949 deletions

View File

@@ -133,7 +133,6 @@ if(NOT MSVC)
if(COMPILER_SUPPORT_WERROR)
set(CMAKE_REQUIRED_FLAGS "-Werror")
endif()
ei_add_cxx_compiler_flag("-pedantic")
ei_add_cxx_compiler_flag("-Wall")
ei_add_cxx_compiler_flag("-Wextra")
@@ -231,6 +230,12 @@ if(NOT MSVC)
message(STATUS "Enabling FMA in tests/examples")
endif()
option(EIGEN_TEST_AVX512 "Enable/Disable AVX512 in tests/examples" OFF)
if(EIGEN_TEST_AVX512)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -fabi-version=6 -DEIGEN_ENABLE_AVX512")
message(STATUS "Enabling AVX512 in tests/examples")
endif()
option(EIGEN_TEST_F16C "Enable/Disable F16C in tests/examples" OFF)
if(EIGEN_TEST_F16C)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c")
@@ -375,7 +380,7 @@ else()
)
endif()
set(CMAKEPACKAGE_INSTALL_DIR
"${CMAKE_INSTALL_LIBDIR}/cmake/eigen3"
"${CMAKE_INSTALL_DATADIR}/eigen3/cmake"
CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen3Config.cmake is installed"
)
set(PKGCONFIG_INSTALL_DIR
@@ -431,6 +436,13 @@ else()
add_subdirectory(lapack EXCLUDE_FROM_ALL)
endif()
# add SYCL
option(EIGEN_TEST_SYCL "Add Sycl support." OFF)
if(EIGEN_TEST_SYCL)
set (CMAKE_MODULE_PATH "${CMAKE_ROOT}/Modules" "cmake/Modules/" "${CMAKE_MODULE_PATH}")
include(FindComputeCpp)
endif()
add_subdirectory(unsupported)
add_subdirectory(demos EXCLUDE_FROM_ALL)
@@ -495,18 +507,89 @@ set ( EIGEN_VERSION_MINOR ${EIGEN_MAJOR_VERSION} )
set ( EIGEN_VERSION_PATCH ${EIGEN_MINOR_VERSION} )
set ( EIGEN_DEFINITIONS "")
set ( EIGEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${INCLUDE_INSTALL_DIR}" )
set ( EIGEN_INCLUDE_DIRS ${EIGEN_INCLUDE_DIR} )
set ( EIGEN_ROOT_DIR ${CMAKE_INSTALL_PREFIX} )
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
@ONLY ESCAPE_QUOTES
)
# Interface libraries require at least CMake 3.0
if (NOT CMAKE_VERSION VERSION_LESS 3.0)
include (CMakePackageConfigHelpers)
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
)
# Imported target support
add_library (eigen INTERFACE)
target_compile_definitions (eigen INTERFACE ${EIGEN_DEFINITIONS})
target_include_directories (eigen INTERFACE
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
$<INSTALL_INTERFACE:${INCLUDE_INSTALL_DIR}>
)
# Export as title case Eigen
set_target_properties (eigen PROPERTIES EXPORT_NAME Eigen)
install (TARGETS eigen EXPORT Eigen3Targets)
configure_package_config_file (
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
PATH_VARS EIGEN_INCLUDE_DIR EIGEN_ROOT_DIR
INSTALL_DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components
)
# Remove CMAKE_SIZEOF_VOID_P from Eigen3ConfigVersion.cmake since Eigen does
# not depend on architecture specific settings or libraries. More
# specifically, an Eigen3Config.cmake generated from a 64 bit target can be
# used for 32 bit targets as well (and vice versa).
set (_Eigen3_CMAKE_SIZEOF_VOID_P ${CMAKE_SIZEOF_VOID_P})
unset (CMAKE_SIZEOF_VOID_P)
write_basic_package_version_file (Eigen3ConfigVersion.cmake
VERSION ${EIGEN_VERSION_NUMBER} COMPATIBILITY SameMajorVersion)
set (CMAKE_SIZEOF_VOID_P ${_Eigen3_CMAKE_SIZEOF_VOID_P})
# The Eigen target will be located in the Eigen3 namespace. Other CMake
# targets can refer to it using Eigen3::Eigen.
export (TARGETS eigen NAMESPACE Eigen3:: FILE Eigen3Targets.cmake)
# Export Eigen3 package to CMake registry such that it can be easily found by
# CMake even if it has not been installed to a standard directory.
export (PACKAGE Eigen3)
install (EXPORT Eigen3Targets NAMESPACE Eigen3:: DESTINATION
${CMAKEPACKAGE_INSTALL_DIR})
install (FILES
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
${CMAKE_CURRENT_BINARY_DIR}/Eigen3ConfigVersion.cmake
${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake
DESTINATION ${CMAKEPACKAGE_INSTALL_DIR})
else (NOT CMAKE_VERSION VERSION_LESS 3.0)
# Fallback to legacy Eigen3Config.cmake without the imported target
# If CMakePackageConfigHelpers module is available (CMake >= 2.8.8)
# create a relocatable Config file, otherwise leave the hardcoded paths
include(CMakePackageConfigHelpers OPTIONAL RESULT_VARIABLE CPCH_PATH)
if(CPCH_PATH)
configure_package_config_file (
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3ConfigLegacy.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
PATH_VARS EIGEN_INCLUDE_DIR EIGEN_ROOT_DIR
INSTALL_DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components
)
else()
# The PACKAGE_* variables are defined by the configure_package_config_file
# but without it we define them manually to the hardcoded paths
set(PACKAGE_INIT "")
set(PACKAGE_EIGEN_INCLUDE_DIR ${EIGEN_INCLUDE_DIR})
set(PACKAGE_EIGEN_ROOT_DIR ${EIGEN_ROOT_DIR})
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3ConfigLegacy.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
@ONLY ESCAPE_QUOTES
)
endif()
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
)
endif (NOT CMAKE_VERSION VERSION_LESS 3.0)
# Add uninstall target
add_custom_target ( uninstall

View File

@@ -4,14 +4,10 @@
## # The following are required to uses Dart and the Cdash dashboard
## ENABLE_TESTING()
## INCLUDE(CTest)
set(CTEST_PROJECT_NAME "Eigen")
set(CTEST_PROJECT_NAME "Eigen3.3")
set(CTEST_NIGHTLY_START_TIME "00:00:00 UTC")
set(CTEST_DROP_METHOD "http")
set(CTEST_DROP_SITE "manao.inria.fr")
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen")
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen3.3")
set(CTEST_DROP_SITE_CDASH TRUE)
set(CTEST_PROJECT_SUBPROJECTS
Official
Unsupported
)

View File

@@ -14,9 +14,9 @@
// first thing Eigen does: stop the compiler from committing suicide
#include "src/Core/util/DisableStupidWarnings.h"
// Handle NVCC/CUDA
#ifdef __CUDACC__
// Do not try asserts on CUDA!
// Handle NVCC/CUDA/SYCL
#if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__)
// Do not try asserts on CUDA and SYCL!
#ifndef EIGEN_NO_DEBUG
#define EIGEN_NO_DEBUG
#endif
@@ -25,17 +25,24 @@
#undef EIGEN_INTERNAL_DEBUGGING
#endif
// Do not try to vectorize on CUDA!
#ifndef EIGEN_DONT_VECTORIZE
#define EIGEN_DONT_VECTORIZE
#endif
#ifdef EIGEN_EXCEPTIONS
#undef EIGEN_EXCEPTIONS
#endif
// All functions callable from CUDA code must be qualified with __device__
#define EIGEN_DEVICE_FUNC __host__ __device__
#ifdef __CUDACC__
// Do not try to vectorize on CUDA and SYCL!
#ifndef EIGEN_DONT_VECTORIZE
#define EIGEN_DONT_VECTORIZE
#endif
#define EIGEN_DEVICE_FUNC __host__ __device__
// We need math_functions.hpp to ensure that that EIGEN_USING_STD_MATH macro
// works properly on the device side
#include <math_functions.hpp>
#else
#define EIGEN_DEVICE_FUNC
#endif
#else
#define EIGEN_DEVICE_FUNC
@@ -51,7 +58,7 @@
#define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
#endif
#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS)
#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL)
#define EIGEN_EXCEPTIONS
#endif
@@ -140,6 +147,15 @@
#ifdef __FMA__
#define EIGEN_VECTORIZE_FMA
#endif
#if defined(__AVX512F__) && defined(EIGEN_ENABLE_AVX512)
#define EIGEN_VECTORIZE_AVX512
#define EIGEN_VECTORIZE_AVX2
#define EIGEN_VECTORIZE_AVX
#define EIGEN_VECTORIZE_FMA
#ifdef __AVX512DQ__
#define EIGEN_VECTORIZE_AVX512DQ
#endif
#endif
// include files
@@ -171,7 +187,7 @@
#ifdef EIGEN_VECTORIZE_SSE4_2
#include <nmmintrin.h>
#endif
#ifdef EIGEN_VECTORIZE_AVX
#if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
#include <immintrin.h>
#endif
#endif
@@ -271,7 +287,9 @@
namespace Eigen {
inline static const char *SimdInstructionSetsInUse(void) {
#if defined(EIGEN_VECTORIZE_AVX)
#if defined(EIGEN_VECTORIZE_AVX512)
return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
#elif defined(EIGEN_VECTORIZE_AVX)
return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
#elif defined(EIGEN_VECTORIZE_SSE4_2)
return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
@@ -331,7 +349,12 @@ using std::ptrdiff_t;
#include "src/Core/GenericPacketMath.h"
#include "src/Core/MathFunctionsImpl.h"
#if defined EIGEN_VECTORIZE_AVX
#if defined EIGEN_VECTORIZE_AVX512
#include "src/Core/arch/SSE/PacketMath.h"
#include "src/Core/arch/AVX/PacketMath.h"
#include "src/Core/arch/AVX512/PacketMath.h"
#include "src/Core/arch/AVX512/MathFunctions.h"
#elif defined EIGEN_VECTORIZE_AVX
// Use AVX for floats and doubles, SSE for integers
#include "src/Core/arch/SSE/PacketMath.h"
#include "src/Core/arch/SSE/Complex.h"
@@ -359,7 +382,6 @@ using std::ptrdiff_t;
#include "src/Core/arch/ZVector/Complex.h"
#endif
#include "src/Core/arch/CUDA/Complex.h"
// Half float support
#include "src/Core/arch/CUDA/Half.h"
#include "src/Core/arch/CUDA/PacketMathHalf.h"
@@ -379,6 +401,10 @@ using std::ptrdiff_t;
#include "src/Core/functors/StlFunctors.h"
#include "src/Core/functors/AssignmentFunctors.h"
// Specialized functors to enable the processing of complex numbers
// on CUDA devices
#include "src/Core/arch/CUDA/Complex.h"
#include "src/Core/DenseCoeffsBase.h"
#include "src/Core/DenseBase.h"
#include "src/Core/MatrixBase.h"

View File

@@ -351,7 +351,7 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
Index ret;
if((ret=unblocked(A11))>=0) return k+ret;
if(rs>0) A11.adjoint().template triangularView<Upper>().template solveInPlace<OnTheRight>(A21);
if(rs>0) A22.template selfadjointView<Lower>().rankUpdate(A21,-1); // bottleneck
if(rs>0) A22.template selfadjointView<Lower>().rankUpdate(A21,typename NumTraits<RealScalar>::Literal(-1)); // bottleneck
}
return -1;
}

View File

@@ -14,34 +14,40 @@ namespace Eigen {
namespace internal {
template<typename Scalar, typename CholmodType>
void cholmod_configure_matrix(CholmodType& mat)
{
if (internal::is_same<Scalar,float>::value)
{
mat.xtype = CHOLMOD_REAL;
mat.dtype = CHOLMOD_SINGLE;
}
else if (internal::is_same<Scalar,double>::value)
{
template<typename Scalar> struct cholmod_configure_matrix;
template<> struct cholmod_configure_matrix<double> {
template<typename CholmodType>
static void run(CholmodType& mat) {
mat.xtype = CHOLMOD_REAL;
mat.dtype = CHOLMOD_DOUBLE;
}
else if (internal::is_same<Scalar,std::complex<float> >::value)
{
mat.xtype = CHOLMOD_COMPLEX;
mat.dtype = CHOLMOD_SINGLE;
}
else if (internal::is_same<Scalar,std::complex<double> >::value)
{
};
template<> struct cholmod_configure_matrix<std::complex<double> > {
template<typename CholmodType>
static void run(CholmodType& mat) {
mat.xtype = CHOLMOD_COMPLEX;
mat.dtype = CHOLMOD_DOUBLE;
}
else
{
eigen_assert(false && "Scalar type not supported by CHOLMOD");
}
}
};
// Other scalar types are not yet suppotred by Cholmod
// template<> struct cholmod_configure_matrix<float> {
// template<typename CholmodType>
// static void run(CholmodType& mat) {
// mat.xtype = CHOLMOD_REAL;
// mat.dtype = CHOLMOD_SINGLE;
// }
// };
//
// template<> struct cholmod_configure_matrix<std::complex<float> > {
// template<typename CholmodType>
// static void run(CholmodType& mat) {
// mat.xtype = CHOLMOD_COMPLEX;
// mat.dtype = CHOLMOD_SINGLE;
// }
// };
} // namespace internal
@@ -49,11 +55,11 @@ void cholmod_configure_matrix(CholmodType& mat)
* Note that the data are shared.
*/
template<typename _Scalar, int _Options, typename _StorageIndex>
cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat)
cholmod_sparse viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_StorageIndex> > mat)
{
cholmod_sparse res;
res.nzmax = mat.nonZeros();
res.nrow = mat.rows();;
res.nrow = mat.rows();
res.ncol = mat.cols();
res.p = mat.outerIndexPtr();
res.i = mat.innerIndexPtr();
@@ -88,7 +94,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat)
}
// setup res.xtype
internal::cholmod_configure_matrix<_Scalar>(res);
internal::cholmod_configure_matrix<_Scalar>::run(res);
res.stype = 0;
@@ -98,7 +104,14 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat)
template<typename _Scalar, int _Options, typename _Index>
const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>& mat)
{
cholmod_sparse res = viewAsCholmod(mat.const_cast_derived());
cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.const_cast_derived()));
return res;
}
template<typename _Scalar, int _Options, typename _Index>
const cholmod_sparse viewAsCholmod(const SparseVector<_Scalar,_Options,_Index>& mat)
{
cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.const_cast_derived()));
return res;
}
@@ -107,7 +120,7 @@ const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>&
template<typename _Scalar, int _Options, typename _Index, unsigned int UpLo>
cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<const SparseMatrix<_Scalar,_Options,_Index>, UpLo>& mat)
{
cholmod_sparse res = viewAsCholmod(mat.matrix().const_cast_derived());
cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.matrix().const_cast_derived()));
if(UpLo==Upper) res.stype = 1;
if(UpLo==Lower) res.stype = -1;
@@ -131,7 +144,7 @@ cholmod_dense viewAsCholmod(MatrixBase<Derived>& mat)
res.x = (void*)(mat.derived().data());
res.z = 0;
internal::cholmod_configure_matrix<Scalar>(res);
internal::cholmod_configure_matrix<Scalar>::run(res);
return res;
}
@@ -180,14 +193,16 @@ class CholmodBase : public SparseSolverBase<Derived>
CholmodBase()
: m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false)
{
m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY);
m_shiftOffset[0] = m_shiftOffset[1] = 0.0;
cholmod_start(&m_cholmod);
}
explicit CholmodBase(const MatrixType& matrix)
: m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false)
{
m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY);
m_shiftOffset[0] = m_shiftOffset[1] = 0.0;
cholmod_start(&m_cholmod);
compute(matrix);
}
@@ -254,7 +269,7 @@ class CholmodBase : public SparseSolverBase<Derived>
eigen_assert(m_analysisIsOk && "You must first call analyzePattern()");
cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>());
cholmod_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, &m_cholmod);
// If the factorization failed, minor is the column at which it did. On success minor == n.
this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue);
m_factorizationIsOk = true;
@@ -290,8 +305,8 @@ class CholmodBase : public SparseSolverBase<Derived>
}
/** \internal */
template<typename RhsScalar, int RhsOptions, typename RhsIndex, typename DestScalar, int DestOptions, typename DestIndex>
void _solve_impl(const SparseMatrix<RhsScalar,RhsOptions,RhsIndex> &b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
template<typename RhsDerived, typename DestDerived>
void _solve_impl(const SparseMatrixBase<RhsDerived> &b, SparseMatrixBase<DestDerived> &dest) const
{
eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
const Index size = m_cholmodFactor->n;
@@ -299,7 +314,8 @@ class CholmodBase : public SparseSolverBase<Derived>
eigen_assert(size==b.rows());
// note: cs stands for Cholmod Sparse
cholmod_sparse b_cs = viewAsCholmod(b);
Ref<SparseMatrix<typename RhsDerived::Scalar,ColMajor,typename RhsDerived::StorageIndex> > b_ref(b.const_cast_derived());
cholmod_sparse b_cs = viewAsCholmod(b_ref);
cholmod_sparse* x_cs = cholmod_spsolve(CHOLMOD_A, m_cholmodFactor, &b_cs, &m_cholmod);
if(!x_cs)
{
@@ -307,7 +323,7 @@ class CholmodBase : public SparseSolverBase<Derived>
return;
}
// TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
dest = viewAsEigen<DestScalar,DestOptions,DestIndex>(*x_cs);
dest.derived() = viewAsEigen<typename DestDerived::Scalar,ColMajor,typename DestDerived::StorageIndex>(*x_cs);
cholmod_free_sparse(&x_cs, &m_cholmod);
}
#endif // EIGEN_PARSED_BY_DOXYGEN
@@ -324,7 +340,7 @@ class CholmodBase : public SparseSolverBase<Derived>
*/
Derived& setShift(const RealScalar& offset)
{
m_shiftOffset[0] = offset;
m_shiftOffset[0] = double(offset);
return derived();
}
@@ -386,7 +402,7 @@ class CholmodBase : public SparseSolverBase<Derived>
protected:
mutable cholmod_common m_cholmod;
cholmod_factor* m_cholmodFactor;
RealScalar m_shiftOffset[2];
double m_shiftOffset[2];
mutable ComputationInfo m_info;
int m_factorizationIsOk;
int m_analysisIsOk;
@@ -410,6 +426,8 @@ class CholmodBase : public SparseSolverBase<Derived>
*
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
*
* \warning Only double precision real and complex scalar types are supported by Cholmod.
*
* \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLLT
*/
template<typename _MatrixType, int _UpLo = Lower>
@@ -459,6 +477,8 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
*
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
*
* \warning Only double precision real and complex scalar types are supported by Cholmod.
*
* \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLDLT
*/
template<typename _MatrixType, int _UpLo = Lower>
@@ -506,6 +526,8 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
*
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
*
* \warning Only double precision real and complex scalar types are supported by Cholmod.
*
* \sa \ref TutorialSparseSolverConcept
*/
template<typename _MatrixType, int _UpLo = Lower>
@@ -555,6 +577,8 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
*
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
*
* \warning Only double precision real and complex scalar types are supported by Cholmod.
*
* \sa \ref TutorialSparseSolverConcept
*/
template<typename _MatrixType, int _UpLo = Lower>

View File

@@ -54,6 +54,8 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
using Base::coeffRef;
EIGEN_DEVICE_FUNC
explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
@@ -71,66 +73,18 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
EIGEN_DEVICE_FUNC
inline const Scalar* data() const { return m_expression.data(); }
EIGEN_DEVICE_FUNC
inline CoeffReturnType coeff(Index rowId, Index colId) const
{
return m_expression.coeff(rowId, colId);
}
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index rowId, Index colId)
{
return m_expression.coeffRef(rowId, colId);
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index rowId, Index colId) const
{
return m_expression.coeffRef(rowId, colId);
}
EIGEN_DEVICE_FUNC
inline CoeffReturnType coeff(Index index) const
{
return m_expression.coeff(index);
}
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index index)
{
return m_expression.coeffRef(index);
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const
{
return m_expression.coeffRef(index);
}
template<int LoadMode>
inline const PacketScalar packet(Index rowId, Index colId) const
{
return m_expression.template packet<LoadMode>(rowId, colId);
}
template<int LoadMode>
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
{
m_expression.template writePacket<LoadMode>(rowId, colId, val);
}
template<int LoadMode>
inline const PacketScalar packet(Index index) const
{
return m_expression.template packet<LoadMode>(index);
}
template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& val)
{
m_expression.template writePacket<LoadMode>(index, val);
}
template<typename Dest>
EIGEN_DEVICE_FUNC
inline void evalTo(Dest& dst) const { dst = m_expression; }
@@ -197,6 +151,8 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
using Base::coeffRef;
EIGEN_DEVICE_FUNC
explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
@@ -214,66 +170,18 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
EIGEN_DEVICE_FUNC
inline const Scalar* data() const { return m_expression.data(); }
EIGEN_DEVICE_FUNC
inline CoeffReturnType coeff(Index rowId, Index colId) const
{
return m_expression.coeff(rowId, colId);
}
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index rowId, Index colId)
{
return m_expression.coeffRef(rowId, colId);
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index rowId, Index colId) const
{
return m_expression.derived().coeffRef(rowId, colId);
}
EIGEN_DEVICE_FUNC
inline CoeffReturnType coeff(Index index) const
{
return m_expression.coeff(index);
}
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index index)
{
return m_expression.coeffRef(index);
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const
{
return m_expression.coeffRef(index);
}
template<int LoadMode>
inline const PacketScalar packet(Index rowId, Index colId) const
{
return m_expression.template packet<LoadMode>(rowId, colId);
}
template<int LoadMode>
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
{
m_expression.template writePacket<LoadMode>(rowId, colId, val);
}
template<int LoadMode>
inline const PacketScalar packet(Index index) const
{
return m_expression.template packet<LoadMode>(index);
}
template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& val)
{
m_expression.template writePacket<LoadMode>(index, val);
}
EIGEN_DEVICE_FUNC
const typename internal::remove_all<NestedExpressionType>::type&
nestedExpression() const

View File

@@ -407,7 +407,7 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
: int(Kernel::AssignmentTraits::DstAlignment),
srcAlignment = Kernel::AssignmentTraits::JointAlignment
};
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(&kernel.dstEvaluator().coeffRef(0), size);
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
@@ -527,7 +527,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
dstAlignment = alignable ? int(requestedAlignment)
: int(Kernel::AssignmentTraits::DstAlignment)
};
const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0);
const Scalar *dst_ptr = kernel.dstDataPtr();
if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
{
// the pointer is not aligend-on scalar, so alignment is not possible
@@ -554,7 +554,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
for(Index inner = alignedEnd; inner<innerSize ; ++inner)
kernel.assignCoeffByOuterInner(outer, inner);
alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
}
}
};
@@ -683,6 +683,11 @@ public:
: int(DstEvaluatorType::Flags)&RowMajorBit ? inner
: outer;
}
EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
{
return m_dstExpr.data();
}
protected:
DstEvaluatorType& m_dst;
@@ -697,15 +702,21 @@ protected:
***************************************************************************/
template<typename DstXprType, typename SrcXprType, typename Functor>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
{
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
typedef evaluator<DstXprType> DstEvaluatorType;
typedef evaluator<SrcXprType> SrcEvaluatorType;
DstEvaluatorType dstEvaluator(dst);
SrcEvaluatorType srcEvaluator(src);
// NOTE To properly handle A = (A*A.transpose())/s with A rectangular,
// we need to resize the destination after the source evaluator has been created.
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
DstEvaluatorType dstEvaluator(dst);
typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
@@ -714,7 +725,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstX
}
template<typename DstXprType, typename SrcXprType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
{
call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
}
@@ -796,11 +807,6 @@ void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
) && int(Dst::SizeAtCompileTime) != 1
};
Index dstRows = NeedToTranspose ? src.cols() : src.rows();
Index dstCols = NeedToTranspose ? src.rows() : src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
ActualDstType actualDst(dst);
@@ -823,15 +829,11 @@ template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
// TODO check whether this is the right place to perform these checks:
EIGEN_STATIC_ASSERT_LVALUE(Dst)
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
Assignment<Dst,Src,Func>::run(dst, src, func);
}
template<typename Dst, typename Src>
@@ -853,8 +855,6 @@ struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
#ifndef EIGEN_NO_DEBUG
internal::check_for_aliasing(dst, src);
#endif
@@ -873,9 +873,42 @@ struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
src.evalTo(dst);
}
// NOTE The following two functions are templated to avoid their instanciation if not needed
// This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
template<typename SrcScalarType>
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
src.addTo(dst);
}
template<typename SrcScalarType>
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
src.subTo(dst);
}
};
} // namespace internal

View File

@@ -817,73 +817,79 @@ struct mapbase_evaluator : evaluator_base<Derived>
ColsAtCompileTime = XprType::ColsAtCompileTime,
CoeffReadCost = NumTraits<Scalar>::ReadCost
};
EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map)
: m_data(const_cast<PointerType>(map.data())),
m_xpr(map)
: m_data(const_cast<PointerType>(map.data())),
m_innerStride(map.innerStride()),
m_outerStride(map.outerStride())
{
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1),
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{
return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
return m_data[col * colStride() + row * rowStride()];
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{
return m_data[index * m_xpr.innerStride()];
return m_data[index * m_innerStride.value()];
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index row, Index col)
{
return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
return m_data[col * colStride() + row * rowStride()];
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index)
{
return m_data[index * m_xpr.innerStride()];
return m_data[index * m_innerStride.value()];
}
template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const
PacketType packet(Index row, Index col) const
{
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
PointerType ptr = m_data + row * rowStride() + col * colStride();
return internal::ploadt<PacketType, LoadMode>(ptr);
}
template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const
PacketType packet(Index index) const
{
return internal::ploadt<PacketType, LoadMode>(m_data + index * m_xpr.innerStride());
return internal::ploadt<PacketType, LoadMode>(m_data + index * m_innerStride.value());
}
template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x)
void writePacket(Index row, Index col, const PacketType& x)
{
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
PointerType ptr = m_data + row * rowStride() + col * colStride();
return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x);
}
template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x)
void writePacket(Index index, const PacketType& x)
{
internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_xpr.innerStride(), x);
internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_innerStride.value(), x);
}
protected:
EIGEN_DEVICE_FUNC
inline Index rowStride() const { return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); }
EIGEN_DEVICE_FUNC
inline Index colStride() const { return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); }
PointerType m_data;
const XprType& m_xpr;
const internal::variable_if_dynamic<Index, XprType::InnerStrideAtCompileTime> m_innerStride;
const internal::variable_if_dynamic<Index, XprType::OuterStrideAtCompileTime> m_outerStride;
};
template<typename PlainObjectType, int MapOptions, typename StrideType>
@@ -1296,7 +1302,7 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
}
protected:
const ArgTypeNested m_arg;
typename internal::add_const_on_value_type<ArgTypeNested>::type m_arg;
const MemberOp m_functor;
};

View File

@@ -84,6 +84,7 @@ class CwiseBinaryOp :
{
public:
typedef typename internal::remove_all<BinaryOp>::type Functor;
typedef typename internal::remove_all<LhsType>::type Lhs;
typedef typename internal::remove_all<RhsType>::type Rhs;

View File

@@ -215,42 +215,29 @@ DenseBase<Derived>::Constant(const Scalar& value)
return DenseBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_constant_op<Scalar>(value));
}
/**
* \brief Sets a linearly spaced vector.
/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(Index,const Scalar&,const Scalar&)
*
* The function generates 'size' equally spaced values in the closed interval [low,high].
* This particular version of LinSpaced() uses sequential access, i.e. vector access is
* assumed to be a(0), a(1), ..., a(size-1). This assumption allows for better vectorization
* and yields faster code than the random access version.
*
* When size is set to 1, a vector of length 1 containing 'high' is returned.
*
* \only_for_vectors
*
* Example: \include DenseBase_LinSpaced_seq.cpp
* Output: \verbinclude DenseBase_LinSpaced_seq.out
*
* \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Index,Scalar,Scalar), CwiseNullaryOp
* \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&)
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,size));
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
}
/**
* \copydoc DenseBase::LinSpaced(Sequential_t, Index, const Scalar&, const Scalar&)
* Special version for fixed size types which does not require the size parameter.
/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(const Scalar&,const Scalar&)
*
* \sa LinSpaced(Scalar,Scalar)
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,Derived::SizeAtCompileTime));
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
}
/**
@@ -264,14 +251,24 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig
* Example: \include DenseBase_LinSpaced.cpp
* Output: \verbinclude DenseBase_LinSpaced.out
*
* \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Sequential_t,Index,const Scalar&,const Scalar&,Index), CwiseNullaryOp
* For integer scalar types, an even spacing is possible if and only if the length of the range,
* i.e., \c high-low is a scalar multiple of \c size-1, or if \c size is a scalar multiple of the
* number of values \c high-low+1 (meaning each value can be repeated the same number of time).
* If one of these two considions is not satisfied, then \c high is lowered to the largest value
* satisfying one of this constraint.
* Here are some examples:
*
* Example: \include DenseBase_LinSpacedInt.cpp
* Output: \verbinclude DenseBase_LinSpacedInt.out
*
* \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar,true>(low,high,size));
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
}
/**
@@ -284,7 +281,7 @@ DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar,true>(low,high,Derived::SizeAtCompileTime));
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
}
/** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */
@@ -377,24 +374,30 @@ PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
* Example: \include DenseBase_setLinSpaced.cpp
* Output: \verbinclude DenseBase_setLinSpaced.out
*
* \sa CwiseNullaryOp
* For integer scalar types, do not miss the explanations on the definition
* of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink.
*
* \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,newSize));
return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar>(low,high,newSize));
}
/**
* \brief Sets a linearly spaced vector.
*
* The function fills *this with equally spaced values in the closed interval [low,high].
* The function fills \c *this with equally spaced values in the closed interval [low,high].
* When size is set to 1, a vector of length 1 containing 'high' is returned.
*
* \only_for_vectors
*
* \sa setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp
* For integer scalar types, do not miss the explanations on the definition
* of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink.
*
* \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
@@ -752,7 +755,7 @@ struct setIdentity_impl<Derived, true>
static EIGEN_STRONG_INLINE Derived& run(Derived& m)
{
m.setZero();
const Index size = (std::min)(m.rows(), m.cols());
const Index size = numext::mini(m.rows(), m.cols());
for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
return m;
}

View File

@@ -260,10 +260,10 @@ template<typename Derived> class DenseBase
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal Represents a matrix with all coefficients equal to one another*/
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
/** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar,false>,PlainObject> SequentialLinSpacedReturnType;
/** \internal \deprecated Represents a vector with linearly spaced coefficients that allows sequential access only. */
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> SequentialLinSpacedReturnType;
/** \internal Represents a vector with linearly spaced coefficients that allows random access. */
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar,true>,PlainObject> RandomAccessLinSpacedReturnType;
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> RandomAccessLinSpacedReturnType;
/** \internal the return type of MatrixBase::eigenvalues() */
typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;

View File

@@ -624,7 +624,7 @@ struct first_aligned_impl<Alignment, Derived, false>
{
static inline Index run(const Derived& m)
{
return internal::first_aligned<Alignment>(&m.const_cast_derived().coeffRef(0,0), m.size());
return internal::first_aligned<Alignment>(m.data(), m.size());
}
};

View File

@@ -290,12 +290,11 @@ MatrixBase<Derived>::asDiagonal() const
template<typename Derived>
bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const
{
using std::abs;
if(cols() != rows()) return false;
RealScalar maxAbsOnDiagonal = static_cast<RealScalar>(-1);
for(Index j = 0; j < cols(); ++j)
{
RealScalar absOnDiagonal = abs(coeff(j,j));
RealScalar absOnDiagonal = numext::abs(coeff(j,j));
if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal;
}
for(Index j = 0; j < cols(); ++j)
@@ -321,6 +320,11 @@ struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense>
{
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
dst.setZero();
dst.diagonal() = src.diagonal();
}

View File

@@ -70,9 +70,11 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG))
typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func;
EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar);
#endif
eigen_assert(size() == other.size());
return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other);

View File

@@ -25,7 +25,8 @@ template<int Rows, int Cols, int Depth> struct product_type_selector;
template<int Size, int MaxSize> struct product_size_category
{
enum { is_large = MaxSize == Dynamic ||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
(Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
value = is_large ? Large
: Size == 1 ? 1
: Small
@@ -264,7 +265,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
if (!evalToDest)
{
if(!alphaIsCompatible)
dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
else
dest = MappedDest(actualDestPtr, dest.size());
}
@@ -329,6 +330,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,false>
template<typename Lhs, typename Rhs, typename Dest>
static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
{
EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
// TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp
typename nested_eval<Rhs,1>::type actual_rhs(rhs);
const Index size = rhs.rows();
@@ -342,6 +344,7 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
template<typename Lhs, typename Rhs, typename Dest>
static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
{
EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
typename nested_eval<Rhs,Lhs::RowsAtCompileTime>::type actual_rhs(rhs);
const Index rows = dest.rows();
for(Index i=0; i<rows; ++i)

View File

@@ -329,7 +329,7 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Pack
*/
template<typename Packet> EIGEN_DEVICE_FUNC inline
typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
predux4(const Packet& a)
predux_downto4(const Packet& a)
{ return a; }
/** \internal \returns the product of the elements of \a a*/
@@ -558,6 +558,34 @@ pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& th
return ifPacket.select[0] ? thenPacket : elsePacket;
}
/** \internal \returns \a a with the first coefficient replaced by the scalar b */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b)
{
// Default implementation based on pblend.
// It must be specialized for higher performance.
Selector<unpacket_traits<Packet>::size> mask;
mask.select[0] = true;
// This for loop should be optimized away by the compiler.
for(Index i=1; i<unpacket_traits<Packet>::size; ++i)
mask.select[i] = false;
return pblend(mask, pset1<Packet>(b), a);
}
/** \internal \returns \a a with the last coefficient replaced by the scalar b */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
{
// Default implementation based on pblend.
// It must be specialized for higher performance.
Selector<unpacket_traits<Packet>::size> mask;
// This for loop should be optimized away by the compiler.
for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
mask.select[i] = false;
mask.select[unpacket_traits<Packet>::size-1] = true;
return pblend(mask, pset1<Packet>(b), a);
}
} // end namespace internal
} // end namespace Eigen

View File

@@ -97,6 +97,19 @@ struct real_default_impl<Scalar,true>
template<typename Scalar> struct real_impl : real_default_impl<Scalar> {};
#ifdef __CUDA_ARCH__
template<typename T>
struct real_impl<std::complex<T> >
{
typedef T RealScalar;
EIGEN_DEVICE_FUNC
static inline T run(const std::complex<T>& x)
{
return x.real();
}
};
#endif
template<typename Scalar>
struct real_retval
{
@@ -132,6 +145,19 @@ struct imag_default_impl<Scalar,true>
template<typename Scalar> struct imag_impl : imag_default_impl<Scalar> {};
#ifdef __CUDA_ARCH__
template<typename T>
struct imag_impl<std::complex<T> >
{
typedef T RealScalar;
EIGEN_DEVICE_FUNC
static inline T run(const std::complex<T>& x)
{
return x.imag();
}
};
#endif
template<typename Scalar>
struct imag_retval
{

View File

@@ -399,12 +399,14 @@ template<typename Derived> class MatrixBase
EIGEN_DEVICE_FUNC
inline PlainObject unitOrthogonal(void) const;
EIGEN_DEVICE_FUNC
inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
// put this as separate enum value to work around possible GCC 4.3 bug (?)
enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)
: ColsAtCompileTime==1 ? Vertical : Horizontal };
typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
EIGEN_DEVICE_FUNC
inline HomogeneousReturnType homogeneous() const;
enum {
@@ -414,7 +416,7 @@ template<typename Derived> class MatrixBase
internal::traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
internal::traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne;
typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType;
EIGEN_DEVICE_FUNC
inline const HNormalizedReturnType hnormalized() const;
////////// Householder module ///////////

View File

@@ -763,6 +763,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
{
// NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.
const bool is_integer = NumTraits<T>::IsInteger;
EIGEN_UNUSED_VARIABLE(is_integer);
EIGEN_STATIC_ASSERT(is_integer,
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
resize(size);
@@ -916,8 +917,8 @@ struct conservative_resize_like_impl
{
// The storage order does not allow us to use reallocation.
typename Derived::PlainObject tmp(rows,cols);
const Index common_rows = (std::min)(rows, _this.rows());
const Index common_cols = (std::min)(cols, _this.cols());
const Index common_rows = numext::mini(rows, _this.rows());
const Index common_cols = numext::mini(cols, _this.cols());
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
_this.derived().swap(tmp);
}
@@ -950,8 +951,8 @@ struct conservative_resize_like_impl
{
// The storage order does not allow us to use reallocation.
typename Derived::PlainObject tmp(other);
const Index common_rows = (std::min)(tmp.rows(), _this.rows());
const Index common_cols = (std::min)(tmp.cols(), _this.cols());
const Index common_rows = numext::mini(tmp.rows(), _this.rows());
const Index common_cols = numext::mini(tmp.cols(), _this.cols());
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
_this.derived().swap(tmp);
}

View File

@@ -140,6 +140,10 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scal
static EIGEN_STRONG_INLINE
void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
// FIXME shall we handle nested_eval here?
generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
}
@@ -154,6 +158,10 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
static EIGEN_STRONG_INLINE
void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
// FIXME shall we handle nested_eval here?
generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
}
@@ -168,6 +176,10 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
static EIGEN_STRONG_INLINE
void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
// FIXME shall we handle nested_eval here?
generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
}
@@ -265,7 +277,7 @@ void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const
// FIXME not very good if rhs is real and lhs complex while alpha is real too
const Index cols = dst.cols();
for (Index j=0; j<cols; ++j)
func(dst.col(j), rhsEval.coeff(0,j) * actual_lhs);
func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs);
}
// Row major result
@@ -278,7 +290,7 @@ void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const
// FIXME not very good if lhs is real and rhs complex while alpha is real too
const Index rows = dst.rows();
for (Index i=0; i<rows; ++i)
func(dst.row(i), lhsEval.coeff(i,0) * actual_rhs);
func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs);
}
template<typename Lhs, typename Rhs>
@@ -354,17 +366,22 @@ template<typename Lhs, typename Rhs>
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> >
{
typedef typename nested_eval<Lhs,1>::type LhsNested;
typedef typename nested_eval<Rhs,1>::type RhsNested;
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
typedef typename internal::conditional<int(Side)==OnTheRight,Lhs,Rhs>::type MatrixType;
typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
template<typename Dest>
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{
LhsNested actual_lhs(lhs);
RhsNested actual_rhs(rhs);
internal::gemv_dense_selector<Side,
(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)
>::run(lhs, rhs, dst, alpha);
>::run(actual_lhs, actual_rhs, dst, alpha);
}
};
@@ -437,6 +454,18 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost);
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
#if 0
std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n";
std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n";
std::cerr << "LhsAlignment= " << LhsAlignment << "\n";
std::cerr << "RhsAlignment= " << RhsAlignment << "\n";
std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n";
std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n";
std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n";
std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n";
std::cerr << "Alignment= " << Alignment << "\n";
std::cerr << "Flags= " << Flags << "\n";
#endif
}
// Everything below here is taken from CoeffBasedProduct.h
@@ -503,8 +532,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
: bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)
Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
: bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)
: 0,
/* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
@@ -555,8 +584,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
}
protected:
const LhsNested m_lhs;
const RhsNested m_rhs;
typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
LhsEtorType m_lhsImpl;
RhsEtorType m_rhsImpl;
@@ -590,7 +619,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
{
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode,Packet>(UnrollingIndex-1, col), res);
res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
}
};
@@ -600,7 +629,7 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
{
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
res = pmadd(lhs.template packet<LoadMode,Packet>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res);
res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
}
};
@@ -609,7 +638,7 @@ struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
{
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode,Packet>(0, col));
res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
}
};
@@ -618,7 +647,7 @@ struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
{
res = pmul(lhs.template packet<LoadMode,Packet>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
}
};
@@ -627,7 +656,7 @@ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
{
res = pset1<Packet>(0);
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
}
};
@@ -636,7 +665,7 @@ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
{
res = pset1<Packet>(0);
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
}
};
@@ -645,7 +674,7 @@ struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
{
res = pset1<Packet>(0);
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
for(Index i = 0; i < innerDim; ++i)
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res);
}
@@ -656,7 +685,7 @@ struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
{
res = pset1<Packet>(0);
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
for(Index i = 0; i < innerDim; ++i)
res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
}

View File

@@ -139,7 +139,11 @@ struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar
typedef Solve<DecType,RhsType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{
// FIXME shall we resize dst here?
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
src.dec()._solve_impl(src.rhs(), dst);
}
};
@@ -151,6 +155,11 @@ struct Assignment<DstXprType, Solve<Transpose<const DecType>,RhsType>, internal:
typedef Solve<Transpose<const DecType>,RhsType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
src.dec().nestedExpression().template _solve_impl_transposed<false>(src.rhs(), dst);
}
};
@@ -163,6 +172,11 @@ struct Assignment<DstXprType, Solve<CwiseUnaryOp<internal::scalar_conjugate_op<t
typedef Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed<true>(src.rhs(), dst);
}
};

View File

@@ -78,6 +78,11 @@ template<typename MatrixType> class Transpose
typename internal::remove_reference<MatrixTypeNested>::type&
nestedExpression() { return m_matrix; }
/** \internal */
void resize(Index nrows, Index ncols) {
m_matrix.resize(ncols,nrows);
}
protected:
typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
};

View File

@@ -641,21 +641,20 @@ MatrixBase<Derived>::triangularView() const
template<typename Derived>
bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const
{
using std::abs;
RealScalar maxAbsOnUpperPart = static_cast<RealScalar>(-1);
for(Index j = 0; j < cols(); ++j)
{
Index maxi = (std::min)(j, rows()-1);
Index maxi = numext::mini(j, rows()-1);
for(Index i = 0; i <= maxi; ++i)
{
RealScalar absValue = abs(coeff(i,j));
RealScalar absValue = numext::abs(coeff(i,j));
if(absValue > maxAbsOnUpperPart) maxAbsOnUpperPart = absValue;
}
}
RealScalar threshold = maxAbsOnUpperPart * prec;
for(Index j = 0; j < cols(); ++j)
for(Index i = j+1; i < rows(); ++i)
if(abs(coeff(i, j)) > threshold) return false;
if(numext::abs(coeff(i, j)) > threshold) return false;
return true;
}
@@ -667,20 +666,19 @@ bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const
template<typename Derived>
bool MatrixBase<Derived>::isLowerTriangular(const RealScalar& prec) const
{
using std::abs;
RealScalar maxAbsOnLowerPart = static_cast<RealScalar>(-1);
for(Index j = 0; j < cols(); ++j)
for(Index i = j; i < rows(); ++i)
{
RealScalar absValue = abs(coeff(i,j));
RealScalar absValue = numext::abs(coeff(i,j));
if(absValue > maxAbsOnLowerPart) maxAbsOnLowerPart = absValue;
}
RealScalar threshold = maxAbsOnLowerPart * prec;
for(Index j = 1; j < cols(); ++j)
{
Index maxi = (std::min)(j, rows()-1);
Index maxi = numext::mini(j, rows()-1);
for(Index i = 0; i < maxi; ++i)
if(abs(coeff(i, j)) > threshold) return false;
if(numext::abs(coeff(i, j)) > threshold) return false;
}
return true;
}
@@ -777,15 +775,18 @@ public:
template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType, typename Functor>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
{
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
typedef evaluator<DstXprType> DstEvaluatorType;
typedef evaluator<SrcXprType> SrcEvaluatorType;
DstEvaluatorType dstEvaluator(dst);
SrcEvaluatorType srcEvaluator(src);
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
DstEvaluatorType dstEvaluator(dst);
typedef triangular_dense_assignment_kernel< Mode&(Lower|Upper),Mode&(UnitDiag|ZeroDiag|SelfAdjoint),SetOpposite,
DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
@@ -802,7 +803,7 @@ void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& sr
template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src)
void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src)
{
call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
}
@@ -893,7 +894,7 @@ struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite>
{
for(Index j = 0; j < kernel.cols(); ++j)
{
Index maxi = (std::min)(j, kernel.rows());
Index maxi = numext::mini(j, kernel.rows());
Index i = 0;
if (((Mode&Lower) && SetOpposite) || (Mode&Upper))
{
@@ -938,6 +939,11 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename SrcXprType::Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
dst.setZero();
dst._assignProduct(src, 1);
}

View File

@@ -602,7 +602,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
return m_matrix / extendedTo(other.derived());
}
/** \returns an expression where each column of row of the referenced matrix are normalized.
/** \returns an expression where each column (or row) of the referenced matrix are normalized.
* The referenced matrix is \b not modified.
* \sa MatrixBase::normalized(), normalize()
*/
@@ -625,6 +625,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/////////// Geometry module ///////////
typedef Homogeneous<ExpressionType,Direction> HomogeneousReturnType;
EIGEN_DEVICE_FUNC
HomogeneousReturnType homogeneous() const;
typedef typename ExpressionType::PlainObject CrossReturnType;
@@ -654,6 +655,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
Direction==Horizontal ? HNormalized_SizeMinusOne : 1> >
HNormalizedReturnType;
EIGEN_DEVICE_FUNC
const HNormalizedReturnType hnormalized() const;
protected:

View File

@@ -456,6 +456,26 @@ ptranspose(PacketBlock<Packet2cd,2>& kernel) {
kernel.packet[0].v = tmp;
}
template<> EIGEN_STRONG_INLINE Packet4cf pinsertfirst(const Packet4cf& a, std::complex<float> b)
{
return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,1|2));
}
template<> EIGEN_STRONG_INLINE Packet2cd pinsertfirst(const Packet2cd& a, std::complex<double> b)
{
return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,1|2));
}
template<> EIGEN_STRONG_INLINE Packet4cf pinsertlast(const Packet4cf& a, std::complex<float> b)
{
return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,(1<<7)|(1<<6)));
}
template<> EIGEN_STRONG_INLINE Packet2cd pinsertlast(const Packet2cd& a, std::complex<double> b)
{
return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,(1<<3)|(1<<2)));
}
} // end namespace internal
} // end namespace Eigen

View File

@@ -355,30 +355,27 @@ pexp<Packet4d>(const Packet4d& _x) {
// Functions for sqrt.
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
// exact solution. The main advantage of this approach is not just speed, but
// also the fact that it can be inlined and pipelined with other computations,
// further reducing its effective latency.
// exact solution. It does not handle +inf, or denormalized numbers correctly.
// The main advantage of this approach is not just speed, but also the fact that
// it can be inlined and pipelined with other computations, further reducing its
// effective latency. This is similar to Quake3's fast inverse square root.
// For detail see here: http://www.beyond3d.com/content/articles/8/
#if EIGEN_FAST_MATH
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
psqrt<Packet8f>(const Packet8f& _x) {
_EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
_EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);
Packet8f neg_half = pmul(_x, p8f_minus_half);
// select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well).
Packet8f non_zero_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_GE_OQ);
Packet8f x = _mm256_and_ps(non_zero_mask, _mm256_rsqrt_ps(_x));
Packet8f half = pmul(_x, pset1<Packet8f>(.5f));
Packet8f denormal_mask = _mm256_and_ps(
_mm256_cmp_ps(_x, pset1<Packet8f>((std::numeric_limits<float>::min)()),
_CMP_LT_OQ),
_mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_GE_OQ));
// Compute approximate reciprocal sqrt.
Packet8f x = _mm256_rsqrt_ps(_x);
// Do a single step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p8f_one_point_five));
// Multiply the original _x by it's reciprocal square root to extract the
// square root.
return pmul(_x, x);
x = pmul(x, psub(pset1<Packet8f>(1.5f), pmul(half, pmul(x,x))));
// Flush results for denormals to zero.
return _mm256_andnot_ps(denormal_mask, pmul(_x,x));
}
#else
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED

View File

@@ -48,7 +48,9 @@ template<> struct is_arithmetic<__m256d> { enum { value = true }; };
#define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \
const Packet8i p8i_##NAME = pset1<Packet8i>(X)
// Use the packet_traits defined in AVX512/PacketMath.h instead if we're going
// to leverage AVX512 instructions.
#ifndef EIGEN_VECTORIZE_AVX512
template<> struct packet_traits<float> : default_packet_traits
{
typedef Packet8f type;
@@ -93,6 +95,7 @@ template<> struct packet_traits<double> : default_packet_traits
HasCeil = 1
};
};
#endif
template<> struct scalar_div_cost<float,true> { enum { value = 14 }; };
template<> struct scalar_div_cost<double,true> { enum { value = 16 }; };
@@ -304,9 +307,11 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
pstore(to, pa);
}
#ifndef EIGEN_VECTORIZE_AVX512
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
#endif
template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
return _mm_cvtss_f32(_mm256_castps256_ps128(a));
@@ -400,7 +405,7 @@ template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
return pfirst(_mm256_hadd_pd(tmp0,tmp0));
}
template<> EIGEN_STRONG_INLINE Packet4f predux4<Packet8f>(const Packet8f& a)
template<> EIGEN_STRONG_INLINE Packet4f predux_downto4<Packet8f>(const Packet8f& a)
{
return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
}
@@ -604,6 +609,26 @@ template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, cons
return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
}
template<> EIGEN_STRONG_INLINE Packet8f pinsertfirst(const Packet8f& a, float b)
{
return _mm256_blend_ps(a,pset1<Packet8f>(b),1);
}
template<> EIGEN_STRONG_INLINE Packet4d pinsertfirst(const Packet4d& a, double b)
{
return _mm256_blend_pd(a,pset1<Packet4d>(b),1);
}
template<> EIGEN_STRONG_INLINE Packet8f pinsertlast(const Packet8f& a, float b)
{
return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7));
}
template<> EIGEN_STRONG_INLINE Packet4d pinsertlast(const Packet4d& a, double b)
{
return _mm256_blend_pd(a,pset1<Packet4d>(b),(1<<3));
}
} // end namespace internal
} // end namespace Eigen

View File

@@ -0,0 +1,396 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Pedro Gonnet (pedro.gonnet@gmail.com)
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
#define THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
namespace Eigen {
namespace internal {
// Disable the code for older versions of gcc that don't support many of the required avx512 instrinsics.
#if EIGEN_GNUC_AT_LEAST(5, 3)
#define _EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
const Packet16f p16f_##NAME = pset1<Packet16f>(X)
#define _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \
const Packet16f p16f_##NAME = (__m512)pset1<Packet16i>(X)
#define _EIGEN_DECLARE_CONST_Packet8d(NAME, X) \
const Packet8d p8d_##NAME = pset1<Packet8d>(X)
#define _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))
// Natural logarithm
// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
// and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can
// be easily approximated by a polynomial centered on m=1 for stability.
#if defined(EIGEN_VECTORIZE_AVX512DQ)
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
plog<Packet16f>(const Packet16f& _x) {
Packet16f x = _x;
_EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
_EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
_EIGEN_DECLARE_CONST_Packet16f(126f, 126.0f);
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inv_mant_mask, ~0x7f800000);
// The smallest non denormalized float number.
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(min_norm_pos, 0x00800000);
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(minus_inf, 0xff800000);
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);
// Polynomial coefficients.
_EIGEN_DECLARE_CONST_Packet16f(cephes_SQRTHF, 0.707106781186547524f);
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p0, 7.0376836292E-2f);
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p1, -1.1514610310E-1f);
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p2, 1.1676998740E-1f);
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p3, -1.2420140846E-1f);
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p4, +1.4249322787E-1f);
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p5, -1.6668057665E-1f);
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p6, +2.0000714765E-1f);
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p7, -2.4999993993E-1f);
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p8, +3.3333331174E-1f);
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_q1, -2.12194440e-4f);
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_q2, 0.693359375f);
// invalid_mask is set to true when x is NaN
__mmask16 invalid_mask =
_mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_NGE_UQ);
__mmask16 iszero_mask =
_mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_EQ_UQ);
// Truncate input values to the minimum positive normal.
x = pmax(x, p16f_min_norm_pos);
// Extract the shifted exponents.
Packet16f emm0 = _mm512_cvtepi32_ps(_mm512_srli_epi32((__m512i)x, 23));
Packet16f e = _mm512_sub_ps(emm0, p16f_126f);
// Set the exponents to -1, i.e. x are in the range [0.5,1).
x = _mm512_and_ps(x, p16f_inv_mant_mask);
x = _mm512_or_ps(x, p16f_half);
// part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
// and shift by -1. The values are then centered around 0, which improves
// the stability of the polynomial evaluation.
// if( x < SQRTHF ) {
// e -= 1;
// x = x + x - 1.0;
// } else { x = x - 1.0; }
__mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ);
Packet16f tmp = _mm512_mask_blend_ps(mask, x, _mm512_setzero_ps());
x = psub(x, p16f_1);
e = psub(e, _mm512_mask_blend_ps(mask, p16f_1, _mm512_setzero_ps()));
x = padd(x, tmp);
Packet16f x2 = pmul(x, x);
Packet16f x3 = pmul(x2, x);
// Evaluate the polynomial approximant of degree 8 in three parts, probably
// to improve instruction-level parallelism.
Packet16f y, y1, y2;
y = pmadd(p16f_cephes_log_p0, x, p16f_cephes_log_p1);
y1 = pmadd(p16f_cephes_log_p3, x, p16f_cephes_log_p4);
y2 = pmadd(p16f_cephes_log_p6, x, p16f_cephes_log_p7);
y = pmadd(y, x, p16f_cephes_log_p2);
y1 = pmadd(y1, x, p16f_cephes_log_p5);
y2 = pmadd(y2, x, p16f_cephes_log_p8);
y = pmadd(y, x3, y1);
y = pmadd(y, x3, y2);
y = pmul(y, x3);
// Add the logarithm of the exponent back to the result of the interpolation.
y1 = pmul(e, p16f_cephes_log_q1);
tmp = pmul(x2, p16f_half);
y = padd(y, y1);
x = psub(x, tmp);
y2 = pmul(e, p16f_cephes_log_q2);
x = padd(x, y);
x = padd(x, y2);
// Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
return _mm512_mask_blend_ps(iszero_mask, p16f_minus_inf,
_mm512_mask_blend_ps(invalid_mask, p16f_nan, x));
}
#endif
// Exponential function. Works by writing "x = m*log(2) + r" where
// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
pexp<Packet16f>(const Packet16f& _x) {
_EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
_EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
_EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);
_EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);
_EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);
_EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);
// Clamp x.
Packet16f x = pmax(pmin(_x, p16f_exp_hi), p16f_exp_lo);
// Express exp(x) as exp(m*ln(2) + r), start by extracting
// m = floor(x/ln(2) + 0.5).
Packet16f m = _mm512_floor_ps(pmadd(x, p16f_cephes_LOG2EF, p16f_half));
// Get r = x - m*ln(2). Note that we can do this without losing more than one
// ulp precision due to the FMA instruction.
_EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
Packet16f r2 = pmul(r, r);
// TODO(gonnet): Split into odd/even polynomials and try to exploit
// instruction-level parallelism.
Packet16f y = p16f_cephes_exp_p0;
y = pmadd(y, r, p16f_cephes_exp_p1);
y = pmadd(y, r, p16f_cephes_exp_p2);
y = pmadd(y, r, p16f_cephes_exp_p3);
y = pmadd(y, r, p16f_cephes_exp_p4);
y = pmadd(y, r, p16f_cephes_exp_p5);
y = pmadd(y, r2, r);
y = padd(y, p16f_1);
// Build emm0 = 2^m.
Packet16i emm0 = _mm512_cvttps_epi32(padd(m, p16f_127));
emm0 = _mm512_slli_epi32(emm0, 23);
// Return 2^m * exp(r).
return pmax(pmul(y, _mm512_castsi512_ps(emm0)), _x);
}
/*template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
pexp<Packet8d>(const Packet8d& _x) {
Packet8d x = _x;
_EIGEN_DECLARE_CONST_Packet8d(1, 1.0);
_EIGEN_DECLARE_CONST_Packet8d(2, 2.0);
_EIGEN_DECLARE_CONST_Packet8d(exp_hi, 709.437);
_EIGEN_DECLARE_CONST_Packet8d(exp_lo, -709.436139303);
_EIGEN_DECLARE_CONST_Packet8d(cephes_LOG2EF, 1.4426950408889634073599);
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p0, 1.26177193074810590878e-4);
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p1, 3.02994407707441961300e-2);
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p2, 9.99999999999999999910e-1);
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q0, 3.00198505138664455042e-6);
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q1, 2.52448340349684104192e-3);
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q2, 2.27265548208155028766e-1);
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q3, 2.00000000000000000009e0);
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_C1, 0.693145751953125);
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_C2, 1.42860682030941723212e-6);
// clamp x
x = pmax(pmin(x, p8d_exp_hi), p8d_exp_lo);
// Express exp(x) as exp(g + n*log(2)).
const Packet8d n =
_mm512_mul_round_pd(p8d_cephes_LOG2EF, x, _MM_FROUND_TO_NEAREST_INT);
// Get the remainder modulo log(2), i.e. the "g" described above. Subtract
// n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last
// digits right.
const Packet8d nC1 = pmul(n, p8d_cephes_exp_C1);
const Packet8d nC2 = pmul(n, p8d_cephes_exp_C2);
x = psub(x, nC1);
x = psub(x, nC2);
const Packet8d x2 = pmul(x, x);
// Evaluate the numerator polynomial of the rational interpolant.
Packet8d px = p8d_cephes_exp_p0;
px = pmadd(px, x2, p8d_cephes_exp_p1);
px = pmadd(px, x2, p8d_cephes_exp_p2);
px = pmul(px, x);
// Evaluate the denominator polynomial of the rational interpolant.
Packet8d qx = p8d_cephes_exp_q0;
qx = pmadd(qx, x2, p8d_cephes_exp_q1);
qx = pmadd(qx, x2, p8d_cephes_exp_q2);
qx = pmadd(qx, x2, p8d_cephes_exp_q3);
// I don't really get this bit, copied from the SSE2 routines, so...
// TODO(gonnet): Figure out what is going on here, perhaps find a better
// rational interpolant?
x = _mm512_div_pd(px, psub(qx, px));
x = pmadd(p8d_2, x, p8d_1);
// Build e=2^n.
const Packet8d e = _mm512_castsi512_pd(_mm512_slli_epi64(
_mm512_add_epi64(_mm512_cvtpd_epi64(n), _mm512_set1_epi64(1023)), 52));
// Construct the result 2^n * exp(g) = e * x. The max is used to catch
// non-finite values in the input.
return pmax(pmul(x, e), _x);
}*/
// Functions for sqrt.
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
// exact solution. The main advantage of this approach is not just speed, but
// also the fact that it can be inlined and pipelined with other computations,
// further reducing its effective latency.
#if EIGEN_FAST_MATH
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
psqrt<Packet16f>(const Packet16f& _x) {
_EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
_EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(flt_min, 0x00800000);
Packet16f neg_half = pmul(_x, p16f_minus_half);
// select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well).
__mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ);
Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_rsqrt14_ps(_x),
_mm512_setzero_ps());
// Do a single step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
// Multiply the original _x by it's reciprocal square root to extract the
// square root.
return pmul(_x, x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
psqrt<Packet8d>(const Packet8d& _x) {
_EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
_EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
_EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(dbl_min, 0x0010000000000000LL);
Packet8d neg_half = pmul(_x, p8d_minus_half);
// select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well).
__mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ);
Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_rsqrt14_pd(_x),
_mm512_setzero_pd());
// Do a first step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
// Do a second step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
// Multiply the original _x by it's reciprocal square root to extract the
// square root.
return pmul(_x, x);
}
#else
template <>
EIGEN_STRONG_INLINE Packet16f psqrt<Packet16f>(const Packet16f& x) {
return _mm512_sqrt_ps(x);
}
template <>
EIGEN_STRONG_INLINE Packet8d psqrt<Packet8d>(const Packet8d& x) {
return _mm512_sqrt_pd(x);
}
#endif
// Functions for rsqrt.
// Almost identical to the sqrt routine, just leave out the last multiplication
// and fill in NaN/Inf where needed. Note that this function only exists as an
// iterative version for doubles since there is no instruction for diretly
// computing the reciprocal square root in AVX-512.
#ifdef EIGEN_FAST_MATH
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
prsqrt<Packet16f>(const Packet16f& _x) {
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inf, 0x7f800000);
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);
_EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
_EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(flt_min, 0x00800000);
Packet16f neg_half = pmul(_x, p16f_minus_half);
// select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well).
__mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ);
Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(),
_mm512_rsqrt14_ps(_x));
// Fill in NaNs and Infs for the negative/zero entries.
__mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ);
Packet16f infs_and_nans = _mm512_mask_blend_ps(
neg_mask, p16f_nan,
_mm512_mask_blend_ps(le_zero_mask, p16f_inf, _mm512_setzero_ps()));
// Do a single step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
// Insert NaNs and Infs in all the right places.
return _mm512_mask_blend_ps(le_zero_mask, infs_and_nans, x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
prsqrt<Packet8d>(const Packet8d& _x) {
_EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(inf, 0x7ff0000000000000LL);
_EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(nan, 0x7ff1000000000000LL);
_EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
_EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
_EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(dbl_min, 0x0010000000000000LL);
Packet8d neg_half = pmul(_x, p8d_minus_half);
// select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well).
__mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ);
Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(),
_mm512_rsqrt14_pd(_x));
// Fill in NaNs and Infs for the negative/zero entries.
__mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ);
Packet8d infs_and_nans = _mm512_mask_blend_pd(
neg_mask, p8d_nan,
_mm512_mask_blend_pd(le_zero_mask, p8d_inf, _mm512_setzero_pd()));
// Do a first step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
// Do a second step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
// Insert NaNs and Infs in all the right places.
return _mm512_mask_blend_pd(le_zero_mask, infs_and_nans, x);
}
#else
template <>
EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
return _mm512_rsqrt28_ps(x);
}
#endif
#endif
} // end namespace internal
} // end namespace Eigen
#endif // THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_

File diff suppressed because it is too large Load Diff

View File

@@ -24,34 +24,43 @@ namespace internal {
// compile. Here, we manually specialize these functors for complex types when
// building for CUDA to avoid non-constexpr methods.
template<typename T> struct scalar_sum_op<std::complex<T>> {
// Sum
template<typename T> struct scalar_sum_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
typedef typename std::complex<T> result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
return std::complex<T>(numext::real(a) + numext::real(b),
numext::imag(a) + numext::imag(b));
}
};
template<typename T> struct scalar_difference_op<std::complex<T>> {
template<typename T> struct scalar_sum_op<std::complex<T>, std::complex<T> > : scalar_sum_op<const std::complex<T>, const std::complex<T> > {};
// Difference
template<typename T> struct scalar_difference_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
typedef typename std::complex<T> result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
return std::complex<T>(numext::real(a) - numext::real(b),
numext::imag(a) - numext::imag(b));
}
};
template<typename T> struct scalar_product_op<std::complex<T>, std::complex<T>> {
template<typename T> struct scalar_difference_op<std::complex<T>, std::complex<T> > : scalar_difference_op<const std::complex<T>, const std::complex<T> > {};
// Product
template<typename T> struct scalar_product_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
enum {
Vectorizable = packet_traits<std::complex<T>>::HasMul
};
typedef typename std::complex<T> result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
const T a_real = numext::real(a);
const T a_imag = numext::imag(a);
const T b_real = numext::real(b);
@@ -61,14 +70,18 @@ template<typename T> struct scalar_product_op<std::complex<T>, std::complex<T>>
}
};
template<typename T> struct scalar_quotient_op<std::complex<T>, std::complex<T>> {
template<typename T> struct scalar_product_op<std::complex<T>, std::complex<T> > : scalar_product_op<const std::complex<T>, const std::complex<T> > {};
// Quotient
template<typename T> struct scalar_quotient_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
enum {
Vectorizable = packet_traits<std::complex<T>>::HasDiv
};
typedef typename std::complex<T> result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
const T a_real = numext::real(a);
const T a_imag = numext::imag(a);
const T b_real = numext::real(b);
@@ -79,6 +92,8 @@ template<typename T> struct scalar_quotient_op<std::complex<T>, std::complex<T>>
}
};
template<typename T> struct scalar_quotient_op<std::complex<T>, std::complex<T> > : scalar_quotient_op<const std::complex<T>, const std::complex<T> > {};
#endif
} // end namespace internal

View File

@@ -15,7 +15,7 @@ namespace Eigen {
namespace internal {
// Most of the following operations require arch >= 3.0
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDACC__) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
template<> struct is_arithmetic<half2> { enum { value = true }; };
@@ -41,15 +41,15 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits
template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16}; typedef half2 half; };
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
template<> __device__ EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
return __half2half2(from);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pload<half2>(const Eigen::half* from) {
template<> __device__ EIGEN_STRONG_INLINE half2 pload<half2>(const Eigen::half* from) {
return *reinterpret_cast<const half2*>(from);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploadu<half2>(const Eigen::half* from) {
template<> __device__ EIGEN_STRONG_INLINE half2 ploadu<half2>(const Eigen::half* from) {
return __halves2half2(from[0], from[1]);
}
@@ -57,17 +57,17 @@ template<> EIGEN_STRONG_INLINE half2 ploaddup<half2>(const Eigen::half* from) {
return __halves2half2(from[0], from[0]);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const half2& from) {
template<> __device__ EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const half2& from) {
*reinterpret_cast<half2*>(to) = from;
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const half2& from) {
template<> __device__ EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const half2& from) {
to[0] = __low2half(from);
to[1] = __high2half(from);
}
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const Eigen::half* from) {
__device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const Eigen::half* from) {
#if __CUDA_ARCH__ >= 350
return __ldg((const half2*)from);
#else
@@ -76,7 +76,7 @@ template<>
}
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Eigen::half* from) {
__device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Eigen::half* from) {
#if __CUDA_ARCH__ >= 350
return __halves2half2(__ldg(from+0), __ldg(from+1));
#else
@@ -84,27 +84,27 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Ei
#endif
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pgather<Eigen::half, half2>(const Eigen::half* from, Index stride) {
template<> __device__ EIGEN_STRONG_INLINE half2 pgather<Eigen::half, half2>(const Eigen::half* from, Index stride) {
return __halves2half2(from[0*stride], from[1*stride]);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<Eigen::half, half2>(Eigen::half* to, const half2& from, Index stride) {
template<> __device__ EIGEN_STRONG_INLINE void pscatter<Eigen::half, half2>(Eigen::half* to, const half2& from, Index stride) {
to[stride*0] = __low2half(from);
to[stride*1] = __high2half(from);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half pfirst<half2>(const half2& a) {
template<> __device__ EIGEN_STRONG_INLINE Eigen::half pfirst<half2>(const half2& a) {
return __low2half(a);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pabs<half2>(const half2& a) {
template<> __device__ EIGEN_STRONG_INLINE half2 pabs<half2>(const half2& a) {
half2 result;
result.x = a.x & 0x7FFF7FFF;
return result;
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
__device__ EIGEN_STRONG_INLINE void
ptranspose(PacketBlock<half2,2>& kernel) {
__half a1 = __low2half(kernel.packet[0]);
__half a2 = __high2half(kernel.packet[0]);
@@ -114,7 +114,7 @@ ptranspose(PacketBlock<half2,2>& kernel) {
kernel.packet[1] = __halves2half2(a2, b2);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen::half& a) {
template<> __device__ EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen::half& a) {
#if __CUDA_ARCH__ >= 530
return __halves2half2(a, __hadd(a, __float2half(1.0f)));
#else
@@ -123,7 +123,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen:
#endif
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) {
template<> __device__ EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) {
#if __CUDA_ARCH__ >= 530
return __hadd2(a, b);
#else
@@ -137,7 +137,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2&
#endif
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) {
template<> __device__ EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) {
#if __CUDA_ARCH__ >= 530
return __hsub2(a, b);
#else
@@ -151,7 +151,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub<half2>(const half2&
#endif
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
template<> __device__ EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
#if __CUDA_ARCH__ >= 530
return __hneg2(a);
#else
@@ -161,9 +161,9 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
#endif
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
template<> __device__ EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) {
template<> __device__ EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) {
#if __CUDA_ARCH__ >= 530
return __hmul2(a, b);
#else
@@ -177,7 +177,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2&
#endif
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) {
template<> __device__ EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) {
#if __CUDA_ARCH__ >= 530
return __hfma2(a, b, c);
#else
@@ -193,7 +193,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2&
#endif
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) {
template<> __device__ EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
@@ -203,7 +203,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2&
return __floats2half2_rn(r1, r2);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a, const half2& b) {
template<> __device__ EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a, const half2& b) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
@@ -213,7 +213,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2&
return __halves2half2(r1, r2);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, const half2& b) {
template<> __device__ EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, const half2& b) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
@@ -223,7 +223,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2&
return __halves2half2(r1, r2);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux<half2>(const half2& a) {
template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux<half2>(const half2& a) {
#if __CUDA_ARCH__ >= 530
return __hadd(__low2half(a), __high2half(a));
#else
@@ -233,7 +233,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux<half2>(const
#endif
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(const half2& a) {
template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(const half2& a) {
#if __CUDA_ARCH__ >= 530
__half first = __low2half(a);
__half second = __high2half(a);
@@ -245,7 +245,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(c
#endif
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(const half2& a) {
template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(const half2& a) {
#if __CUDA_ARCH__ >= 530
__half first = __low2half(a);
__half second = __high2half(a);
@@ -257,7 +257,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(c
#endif
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(const half2& a) {
template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(const half2& a) {
#if __CUDA_ARCH__ >= 530
return __hmul(__low2half(a), __high2half(a));
#else
@@ -267,7 +267,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(c
#endif
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float r1 = log1pf(a1);
@@ -277,29 +277,29 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
template<> __device__ EIGEN_STRONG_INLINE
half2 plog<half2>(const half2& a) {
return h2log(a);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
template<> __device__ EIGEN_STRONG_INLINE
half2 pexp<half2>(const half2& a) {
return h2exp(a);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
template<> __device__ EIGEN_STRONG_INLINE
half2 psqrt<half2>(const half2& a) {
return h2sqrt(a);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
template<> __device__ EIGEN_STRONG_INLINE
half2 prsqrt<half2>(const half2& a) {
return h2rsqrt(a);
}
#else
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog<half2>(const half2& a) {
template<> __device__ EIGEN_STRONG_INLINE half2 plog<half2>(const half2& a) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float r1 = logf(a1);
@@ -307,7 +307,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog<half2>(const half2&
return __floats2half2_rn(r1, r2);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexp<half2>(const half2& a) {
template<> __device__ EIGEN_STRONG_INLINE half2 pexp<half2>(const half2& a) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float r1 = expf(a1);
@@ -315,7 +315,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexp<half2>(const half2&
return __floats2half2_rn(r1, r2);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psqrt<half2>(const half2& a) {
template<> __device__ EIGEN_STRONG_INLINE half2 psqrt<half2>(const half2& a) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float r1 = sqrtf(a1);
@@ -323,7 +323,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psqrt<half2>(const half2&
return __floats2half2_rn(r1, r2);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 prsqrt<half2>(const half2& a) {
template<> __device__ EIGEN_STRONG_INLINE half2 prsqrt<half2>(const half2& a) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float r1 = rsqrtf(a1);
@@ -333,6 +333,374 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 prsqrt<half2>(const half2
#endif
#elif defined EIGEN_VECTORIZE_AVX512
typedef struct {
__m256i x;
} Packet16h;
template<> struct is_arithmetic<Packet16h> { enum { value = true }; };
template <>
struct packet_traits<half> : default_packet_traits {
typedef Packet16h type;
// There is no half-size packet for Packet16h.
typedef Packet16h half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 16,
HasHalfPacket = 0,
HasAdd = 0,
HasSub = 0,
HasMul = 0,
HasNegate = 0,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasConj = 0,
HasSetLinear = 0,
HasDiv = 0,
HasSqrt = 0,
HasRsqrt = 0,
HasExp = 0,
HasLog = 0,
HasBlend = 0
};
};
template<> struct unpacket_traits<Packet16h> { typedef Eigen::half type; enum {size=16, alignment=Aligned32}; typedef Packet16h half; };
template<> EIGEN_STRONG_INLINE Packet16h pset1<Packet16h>(const Eigen::half& from) {
Packet16h result;
result.x = _mm256_set1_epi16(from.x);
return result;
}
template<> EIGEN_STRONG_INLINE Eigen::half pfirst<Packet16h>(const Packet16h& from) {
return half_impl::raw_uint16_to_half(static_cast<unsigned short>(_mm256_extract_epi16(from.x, 0)));
}
template<> EIGEN_STRONG_INLINE Packet16h pload<Packet16h>(const Eigen::half* from) {
Packet16h result;
result.x = _mm256_load_si256(reinterpret_cast<const __m256i*>(from));
return result;
}
template<> EIGEN_STRONG_INLINE Packet16h ploadu<Packet16h>(const Eigen::half* from) {
Packet16h result;
result.x = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from));
return result;
}
template<> EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet16h& from) {
_mm256_store_si256((__m256i*)to, from.x);
}
template<> EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet16h& from) {
_mm256_storeu_si256((__m256i*)to, from.x);
}
template<> EIGEN_STRONG_INLINE Packet16h
ploadquad(const Eigen::half* from) {
Packet16h result;
unsigned short a = from[0].x;
unsigned short b = from[1].x;
unsigned short c = from[2].x;
unsigned short d = from[3].x;
result.x = _mm256_set_epi16(d, d, d, d, c, c, c, c, b, b, b, b, a, a, a, a);
return result;
}
EIGEN_STRONG_INLINE Packet16f half2float(const Packet16h& a) {
#ifdef EIGEN_HAS_FP16_C
return _mm512_cvtph_ps(a.x);
#else
EIGEN_ALIGN64 half aux[16];
pstore(aux, a);
float f0(aux[0]);
float f1(aux[1]);
float f2(aux[2]);
float f3(aux[3]);
float f4(aux[4]);
float f5(aux[5]);
float f6(aux[6]);
float f7(aux[7]);
float f8(aux[8]);
float f9(aux[9]);
float fa(aux[10]);
float fb(aux[11]);
float fc(aux[12]);
float fd(aux[13]);
float fe(aux[14]);
float ff(aux[15]);
return _mm512_set_ps(
ff, fe, fd, fc, fb, fa, f9, f8, f7, f6, f5, f4, f3, f2, f1, f0);
#endif
}
EIGEN_STRONG_INLINE Packet16h float2half(const Packet16f& a) {
#ifdef EIGEN_HAS_FP16_C
Packet16h result;
result.x = _mm512_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC);
return result;
#else
EIGEN_ALIGN64 float aux[16];
pstore(aux, a);
half h0(aux[0]);
half h1(aux[1]);
half h2(aux[2]);
half h3(aux[3]);
half h4(aux[4]);
half h5(aux[5]);
half h6(aux[6]);
half h7(aux[7]);
half h8(aux[8]);
half h9(aux[9]);
half ha(aux[10]);
half hb(aux[11]);
half hc(aux[12]);
half hd(aux[13]);
half he(aux[14]);
half hf(aux[15]);
Packet16h result;
result.x = _mm256_set_epi16(
hf.x, he.x, hd.x, hc.x, hb.x, ha.x, h9.x, h8.x,
h7.x, h6.x, h5.x, h4.x, h3.x, h2.x, h1.x, h0.x);
return result;
#endif
}
template<> EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) {
Packet16f af = half2float(a);
Packet16f bf = half2float(b);
Packet16f rf = padd(af, bf);
return float2half(rf);
}
template<> EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) {
Packet16f af = half2float(a);
Packet16f bf = half2float(b);
Packet16f rf = pmul(af, bf);
return float2half(rf);
}
template<> EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& from) {
Packet16f from_float = half2float(from);
return half(predux(from_float));
}
template<> EIGEN_STRONG_INLINE Packet16h pgather<Eigen::half, Packet16h>(const Eigen::half* from, Index stride)
{
Packet16h result;
result.x = _mm256_set_epi16(
from[15*stride].x, from[14*stride].x, from[13*stride].x, from[12*stride].x,
from[11*stride].x, from[10*stride].x, from[9*stride].x, from[8*stride].x,
from[7*stride].x, from[6*stride].x, from[5*stride].x, from[4*stride].x,
from[3*stride].x, from[2*stride].x, from[1*stride].x, from[0*stride].x);
return result;
}
template<> EIGEN_STRONG_INLINE void pscatter<half, Packet16h>(half* to, const Packet16h& from, Index stride)
{
EIGEN_ALIGN64 half aux[16];
pstore(aux, from);
to[stride*0].x = aux[0].x;
to[stride*1].x = aux[1].x;
to[stride*2].x = aux[2].x;
to[stride*3].x = aux[3].x;
to[stride*4].x = aux[4].x;
to[stride*5].x = aux[5].x;
to[stride*6].x = aux[6].x;
to[stride*7].x = aux[7].x;
to[stride*8].x = aux[8].x;
to[stride*9].x = aux[9].x;
to[stride*10].x = aux[10].x;
to[stride*11].x = aux[11].x;
to[stride*12].x = aux[12].x;
to[stride*13].x = aux[13].x;
to[stride*14].x = aux[14].x;
to[stride*15].x = aux[15].x;
}
EIGEN_STRONG_INLINE void
ptranspose(PacketBlock<Packet16h,16>& kernel) {
__m256i a = kernel.packet[0].x;
__m256i b = kernel.packet[1].x;
__m256i c = kernel.packet[2].x;
__m256i d = kernel.packet[3].x;
__m256i e = kernel.packet[4].x;
__m256i f = kernel.packet[5].x;
__m256i g = kernel.packet[6].x;
__m256i h = kernel.packet[7].x;
__m256i i = kernel.packet[8].x;
__m256i j = kernel.packet[9].x;
__m256i k = kernel.packet[10].x;
__m256i l = kernel.packet[11].x;
__m256i m = kernel.packet[12].x;
__m256i n = kernel.packet[13].x;
__m256i o = kernel.packet[14].x;
__m256i p = kernel.packet[15].x;
__m256i ab_07 = _mm256_unpacklo_epi16(a, b);
__m256i cd_07 = _mm256_unpacklo_epi16(c, d);
__m256i ef_07 = _mm256_unpacklo_epi16(e, f);
__m256i gh_07 = _mm256_unpacklo_epi16(g, h);
__m256i ij_07 = _mm256_unpacklo_epi16(i, j);
__m256i kl_07 = _mm256_unpacklo_epi16(k, l);
__m256i mn_07 = _mm256_unpacklo_epi16(m, n);
__m256i op_07 = _mm256_unpacklo_epi16(o, p);
__m256i ab_8f = _mm256_unpackhi_epi16(a, b);
__m256i cd_8f = _mm256_unpackhi_epi16(c, d);
__m256i ef_8f = _mm256_unpackhi_epi16(e, f);
__m256i gh_8f = _mm256_unpackhi_epi16(g, h);
__m256i ij_8f = _mm256_unpackhi_epi16(i, j);
__m256i kl_8f = _mm256_unpackhi_epi16(k, l);
__m256i mn_8f = _mm256_unpackhi_epi16(m, n);
__m256i op_8f = _mm256_unpackhi_epi16(o, p);
__m256i abcd_03 = _mm256_unpacklo_epi32(ab_07, cd_07);
__m256i abcd_47 = _mm256_unpackhi_epi32(ab_07, cd_07);
__m256i efgh_03 = _mm256_unpacklo_epi32(ef_07, gh_07);
__m256i efgh_47 = _mm256_unpackhi_epi32(ef_07, gh_07);
__m256i ijkl_03 = _mm256_unpacklo_epi32(ij_07, kl_07);
__m256i ijkl_47 = _mm256_unpackhi_epi32(ij_07, kl_07);
__m256i mnop_03 = _mm256_unpacklo_epi32(mn_07, op_07);
__m256i mnop_47 = _mm256_unpackhi_epi32(mn_07, op_07);
__m256i abcd_8b = _mm256_unpacklo_epi32(ab_8f, cd_8f);
__m256i abcd_cf = _mm256_unpackhi_epi32(ab_8f, cd_8f);
__m256i efgh_8b = _mm256_unpacklo_epi32(ef_8f, gh_8f);
__m256i efgh_cf = _mm256_unpackhi_epi32(ef_8f, gh_8f);
__m256i ijkl_8b = _mm256_unpacklo_epi32(ij_8f, kl_8f);
__m256i ijkl_cf = _mm256_unpackhi_epi32(ij_8f, kl_8f);
__m256i mnop_8b = _mm256_unpacklo_epi32(mn_8f, op_8f);
__m256i mnop_cf = _mm256_unpackhi_epi32(mn_8f, op_8f);
__m256i abcdefgh_01 = _mm256_unpacklo_epi64(abcd_03, efgh_03);
__m256i abcdefgh_23 = _mm256_unpackhi_epi64(abcd_03, efgh_03);
__m256i ijklmnop_01 = _mm256_unpacklo_epi64(ijkl_03, mnop_03);
__m256i ijklmnop_23 = _mm256_unpackhi_epi64(ijkl_03, mnop_03);
__m256i abcdefgh_45 = _mm256_unpacklo_epi64(abcd_47, efgh_47);
__m256i abcdefgh_67 = _mm256_unpackhi_epi64(abcd_47, efgh_47);
__m256i ijklmnop_45 = _mm256_unpacklo_epi64(ijkl_47, mnop_47);
__m256i ijklmnop_67 = _mm256_unpackhi_epi64(ijkl_47, mnop_47);
__m256i abcdefgh_89 = _mm256_unpacklo_epi64(abcd_8b, efgh_8b);
__m256i abcdefgh_ab = _mm256_unpackhi_epi64(abcd_8b, efgh_8b);
__m256i ijklmnop_89 = _mm256_unpacklo_epi64(ijkl_8b, mnop_8b);
__m256i ijklmnop_ab = _mm256_unpackhi_epi64(ijkl_8b, mnop_8b);
__m256i abcdefgh_cd = _mm256_unpacklo_epi64(abcd_cf, efgh_cf);
__m256i abcdefgh_ef = _mm256_unpackhi_epi64(abcd_cf, efgh_cf);
__m256i ijklmnop_cd = _mm256_unpacklo_epi64(ijkl_cf, mnop_cf);
__m256i ijklmnop_ef = _mm256_unpackhi_epi64(ijkl_cf, mnop_cf);
// NOTE: no unpacklo/hi instr in this case, so using permute instr.
__m256i a_p_0 = _mm256_permute2x128_si256(abcdefgh_01, ijklmnop_01, 0x20);
__m256i a_p_1 = _mm256_permute2x128_si256(abcdefgh_01, ijklmnop_01, 0x31);
__m256i a_p_2 = _mm256_permute2x128_si256(abcdefgh_23, ijklmnop_23, 0x20);
__m256i a_p_3 = _mm256_permute2x128_si256(abcdefgh_23, ijklmnop_23, 0x31);
__m256i a_p_4 = _mm256_permute2x128_si256(abcdefgh_45, ijklmnop_45, 0x20);
__m256i a_p_5 = _mm256_permute2x128_si256(abcdefgh_45, ijklmnop_45, 0x31);
__m256i a_p_6 = _mm256_permute2x128_si256(abcdefgh_67, ijklmnop_67, 0x20);
__m256i a_p_7 = _mm256_permute2x128_si256(abcdefgh_67, ijklmnop_67, 0x31);
__m256i a_p_8 = _mm256_permute2x128_si256(abcdefgh_89, ijklmnop_89, 0x20);
__m256i a_p_9 = _mm256_permute2x128_si256(abcdefgh_89, ijklmnop_89, 0x31);
__m256i a_p_a = _mm256_permute2x128_si256(abcdefgh_ab, ijklmnop_ab, 0x20);
__m256i a_p_b = _mm256_permute2x128_si256(abcdefgh_ab, ijklmnop_ab, 0x31);
__m256i a_p_c = _mm256_permute2x128_si256(abcdefgh_cd, ijklmnop_cd, 0x20);
__m256i a_p_d = _mm256_permute2x128_si256(abcdefgh_cd, ijklmnop_cd, 0x31);
__m256i a_p_e = _mm256_permute2x128_si256(abcdefgh_ef, ijklmnop_ef, 0x20);
__m256i a_p_f = _mm256_permute2x128_si256(abcdefgh_ef, ijklmnop_ef, 0x31);
kernel.packet[0].x = a_p_0;
kernel.packet[1].x = a_p_1;
kernel.packet[2].x = a_p_2;
kernel.packet[3].x = a_p_3;
kernel.packet[4].x = a_p_4;
kernel.packet[5].x = a_p_5;
kernel.packet[6].x = a_p_6;
kernel.packet[7].x = a_p_7;
kernel.packet[8].x = a_p_8;
kernel.packet[9].x = a_p_9;
kernel.packet[10].x = a_p_a;
kernel.packet[11].x = a_p_b;
kernel.packet[12].x = a_p_c;
kernel.packet[13].x = a_p_d;
kernel.packet[14].x = a_p_e;
kernel.packet[15].x = a_p_f;
}
EIGEN_STRONG_INLINE void
ptranspose(PacketBlock<Packet16h,8>& kernel) {
EIGEN_ALIGN64 half in[8][16];
pstore<half>(in[0], kernel.packet[0]);
pstore<half>(in[1], kernel.packet[1]);
pstore<half>(in[2], kernel.packet[2]);
pstore<half>(in[3], kernel.packet[3]);
pstore<half>(in[4], kernel.packet[4]);
pstore<half>(in[5], kernel.packet[5]);
pstore<half>(in[6], kernel.packet[6]);
pstore<half>(in[7], kernel.packet[7]);
EIGEN_ALIGN64 half out[8][16];
for (int i = 0; i < 8; ++i) {
for (int j = 0; j < 8; ++j) {
out[i][j] = in[j][2*i];
}
for (int j = 0; j < 8; ++j) {
out[i][j+8] = in[j][2*i+1];
}
}
kernel.packet[0] = pload<Packet16h>(out[0]);
kernel.packet[1] = pload<Packet16h>(out[1]);
kernel.packet[2] = pload<Packet16h>(out[2]);
kernel.packet[3] = pload<Packet16h>(out[3]);
kernel.packet[4] = pload<Packet16h>(out[4]);
kernel.packet[5] = pload<Packet16h>(out[5]);
kernel.packet[6] = pload<Packet16h>(out[6]);
kernel.packet[7] = pload<Packet16h>(out[7]);
}
EIGEN_STRONG_INLINE void
ptranspose(PacketBlock<Packet16h,4>& kernel) {
EIGEN_ALIGN64 half in[4][16];
pstore<half>(in[0], kernel.packet[0]);
pstore<half>(in[1], kernel.packet[1]);
pstore<half>(in[2], kernel.packet[2]);
pstore<half>(in[3], kernel.packet[3]);
EIGEN_ALIGN64 half out[4][16];
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < 4; ++j) {
out[i][j] = in[j][4*i];
}
for (int j = 0; j < 4; ++j) {
out[i][j+4] = in[j][4*i+1];
}
for (int j = 0; j < 4; ++j) {
out[i][j+8] = in[j][4*i+2];
}
for (int j = 0; j < 4; ++j) {
out[i][j+12] = in[j][4*i+3];
}
}
kernel.packet[0] = pload<Packet16h>(out[0]);
kernel.packet[1] = pload<Packet16h>(out[1]);
kernel.packet[2] = pload<Packet16h>(out[2]);
kernel.packet[3] = pload<Packet16h>(out[3]);
}
#elif defined EIGEN_VECTORIZE_AVX
typedef struct {
@@ -492,6 +860,30 @@ template<> EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet8h>(Eigen::half*
to[stride*7].x = aux[7].x;
}
template<> EIGEN_STRONG_INLINE Eigen::half predux<Packet8h>(const Packet8h& a) {
Packet8f af = half2float(a);
float reduced = predux<Packet8f>(af);
return Eigen::half(reduced);
}
template<> EIGEN_STRONG_INLINE Eigen::half predux_max<Packet8h>(const Packet8h& a) {
Packet8f af = half2float(a);
float reduced = predux_max<Packet8f>(af);
return Eigen::half(reduced);
}
template<> EIGEN_STRONG_INLINE Eigen::half predux_min<Packet8h>(const Packet8h& a) {
Packet8f af = half2float(a);
float reduced = predux_min<Packet8f>(af);
return Eigen::half(reduced);
}
template<> EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet8h>(const Packet8h& a) {
Packet8f af = half2float(a);
float reduced = predux_mul<Packet8f>(af);
return Eigen::half(reduced);
}
EIGEN_STRONG_INLINE void
ptranspose(PacketBlock<Packet8h,8>& kernel) {
__m128i a = kernel.packet[0].x;

View File

@@ -100,6 +100,33 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(cons
return __floats2half2_rn(a.x, a.y);
}
#elif defined EIGEN_VECTORIZE_AVX512
template <>
struct type_casting_traits<half, float> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 1,
TgtCoeffRatio = 1
};
};
template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16h, Packet16f>(const Packet16h& a) {
return half2float(a);
}
template <>
struct type_casting_traits<float, half> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 1,
TgtCoeffRatio = 1
};
};
template<> EIGEN_STRONG_INLINE Packet16h pcast<Packet16f, Packet16h>(const Packet16f& a) {
return float2half(a);
}
#elif defined EIGEN_VECTORIZE_AVX
template <>

View File

@@ -16,8 +16,14 @@ namespace Eigen {
namespace internal {
inline uint32x4_t p4ui_CONJ_XOR() {
// See bug 1325, clang fails to call vld1q_u64.
#if EIGEN_COMP_CLANG
uint32x4_t ret = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
return ret;
#else
static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
return vld1q_u32( conj_XOR_DATA );
#endif
}
inline uint32x2_t p2ui_CONJ_XOR() {
@@ -282,8 +288,13 @@ ptranspose(PacketBlock<Packet2cf,2>& kernel) {
//---------- double ----------
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
// See bug 1325, clang fails to call vld1q_u64.
#if EIGEN_COMP_CLANG
static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
#else
const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
#endif
struct Packet1cd
{

View File

@@ -476,6 +476,26 @@ template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, co
return Packet2cf(_mm_castpd_ps(result));
}
template<> EIGEN_STRONG_INLINE Packet2cf pinsertfirst(const Packet2cf& a, std::complex<float> b)
{
return Packet2cf(_mm_loadl_pi(a.v, reinterpret_cast<const __m64*>(&b)));
}
template<> EIGEN_STRONG_INLINE Packet1cd pinsertfirst(const Packet1cd&, std::complex<double> b)
{
return pset1<Packet1cd>(b);
}
template<> EIGEN_STRONG_INLINE Packet2cf pinsertlast(const Packet2cf& a, std::complex<float> b)
{
return Packet2cf(_mm_loadh_pi(a.v, reinterpret_cast<const __m64*>(&b)));
}
template<> EIGEN_STRONG_INLINE Packet1cd pinsertlast(const Packet1cd&, std::complex<double> b)
{
return pset1<Packet1cd>(b);
}
} // end namespace internal
} // end namespace Eigen

View File

@@ -32,7 +32,7 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
/* the smallest non denormalized float number */
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000);//-1.f/0.f);
/* natural logarithm computed for 4 simultaneous float
return NaN for x <= 0
*/
@@ -444,25 +444,33 @@ Packet4f pcos<Packet4f>(const Packet4f& _x)
#if EIGEN_FAST_MATH
// This is based on Quake3's fast inverse square root.
// Functions for sqrt.
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
// exact solution. It does not handle +inf, or denormalized numbers correctly.
// The main advantage of this approach is not just speed, but also the fact that
// it can be inlined and pipelined with other computations, further reducing its
// effective latency. This is similar to Quake3's fast inverse square root.
// For detail see here: http://www.beyond3d.com/content/articles/8/
// It lacks 1 (or 2 bits in some rare cases) of precision, and does not handle negative, +inf, or denormalized numbers correctly.
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f psqrt<Packet4f>(const Packet4f& _x)
{
Packet4f half = pmul(_x, pset1<Packet4f>(.5f));
Packet4f denormal_mask = _mm_and_ps(
_mm_cmpge_ps(_x, _mm_setzero_ps()),
_mm_cmplt_ps(_x, pset1<Packet4f>((std::numeric_limits<float>::min)())));
/* select only the inverse sqrt of non-zero inputs */
Packet4f non_zero_mask = _mm_cmpge_ps(_x, pset1<Packet4f>((std::numeric_limits<float>::min)()));
Packet4f x = _mm_and_ps(non_zero_mask, _mm_rsqrt_ps(_x));
// Compute approximate reciprocal sqrt.
Packet4f x = _mm_rsqrt_ps(_x);
// Do a single step of Newton's iteration.
x = pmul(x, psub(pset1<Packet4f>(1.5f), pmul(half, pmul(x,x))));
return pmul(_x,x);
// Flush results for denormals to zero.
return _mm_andnot_ps(denormal_mask, pmul(_x,x));
}
#else
template<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
template<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
#endif
@@ -491,7 +499,7 @@ Packet4f prsqrt<Packet4f>(const Packet4f& _x) {
Packet4f neg_mask = _mm_cmplt_ps(_x, _mm_setzero_ps());
Packet4f zero_mask = _mm_andnot_ps(neg_mask, le_zero_mask);
Packet4f infs_and_nans = _mm_or_ps(_mm_and_ps(neg_mask, p4f_nan),
_mm_and_ps(zero_mask, p4f_inf));
_mm_and_ps(zero_mask, p4f_inf));
// Do a single step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p4f_one_point_five));

View File

@@ -818,6 +818,44 @@ template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, cons
#endif
}
template<> EIGEN_STRONG_INLINE Packet4f pinsertfirst(const Packet4f& a, float b)
{
#ifdef EIGEN_VECTORIZE_SSE4_1
return _mm_blend_ps(a,pset1<Packet4f>(b),1);
#else
return _mm_move_ss(a, _mm_load_ss(&b));
#endif
}
template<> EIGEN_STRONG_INLINE Packet2d pinsertfirst(const Packet2d& a, double b)
{
#ifdef EIGEN_VECTORIZE_SSE4_1
return _mm_blend_pd(a,pset1<Packet2d>(b),1);
#else
return _mm_move_sd(a, _mm_load_sd(&b));
#endif
}
template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b)
{
#ifdef EIGEN_VECTORIZE_SSE4_1
return _mm_blend_ps(a,pset1<Packet4f>(b),(1<<3));
#else
const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF));
return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, pset1<Packet4f>(b)));
#endif
}
template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b)
{
#ifdef EIGEN_VECTORIZE_SSE4_1
return _mm_blend_pd(a,pset1<Packet2d>(b),(1<<1));
#else
const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF));
return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet2d>(b)));
#endif
}
// Scalar path for pmadd with FMA to ensure consistency with vectorized path.
#ifdef __FMA__
template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {

View File

@@ -2,6 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -24,13 +25,48 @@ struct Packet1cd
Packet2d v;
};
struct Packet2cf
{
EIGEN_STRONG_INLINE Packet2cf() {}
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
union {
Packet4f v;
Packet1cd cd[2];
};
};
template<> struct packet_traits<std::complex<float> > : default_packet_traits
{
typedef Packet2cf type;
typedef Packet2cf half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1,
HasNegate = 1,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasBlend = 1,
HasSetLinear = 0
};
};
template<> struct packet_traits<std::complex<double> > : default_packet_traits
{
typedef Packet1cd type;
typedef Packet1cd half;
enum {
Vectorizable = 1,
AlignedOnScalar = 0,
AlignedOnScalar = 1,
size = 1,
HasHalfPacket = 0,
@@ -47,20 +83,68 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
};
};
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
/* Forward declaration */
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel);
template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
Packet2cf res;
res.cd[0] = Packet1cd(vec_ld2f((const float *)&from));
res.cd[1] = res.cd[0];
return res;
}
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
{
std::complex<float> EIGEN_ALIGN16 af[2];
af[0] = from[0*stride];
af[1] = from[1*stride];
return pload<Packet2cf>(af);
}
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride EIGEN_UNUSED)
{
return pload<Packet1cd>(from);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
{
std::complex<float> EIGEN_ALIGN16 af[2];
pstore<std::complex<float> >((std::complex<float> *) af, from);
to[0*stride] = af[0];
to[1*stride] = af[1];
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride EIGEN_UNUSED)
{
pstore<std::complex<double> >(to, from);
}
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v, b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v, b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(Packet4f(a.v))); }
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); }
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
{
Packet2cf res;
res.v.v4f[0] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0]))).v;
res.v.v4f[1] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1]))).v;
return res;
}
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
@@ -79,43 +163,90 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con
return Packet1cd(v1 + v2);
}
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from)
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
return pset1<Packet1cd>(*from);
Packet2cf res;
res.v.v4f[0] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[0]))).v;
res.v.v4f[1] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[1]))).v;
return res;
}
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
{
std::complex<double> EIGEN_ALIGN16 res[2];
pstore<std::complex<double> >(res, a);
std::complex<double> EIGEN_ALIGN16 res;
pstore<std::complex<double> >(&res, a);
return res;
}
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
{
std::complex<float> EIGEN_ALIGN16 res[2];
pstore<std::complex<float> >(res, a);
return res[0];
}
template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
{
Packet2cf res;
res.cd[0] = a.cd[1];
res.cd[1] = a.cd[0];
return res;
}
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
{
return pfirst(a);
}
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
{
std::complex<float> res;
Packet1cd b = padd<Packet1cd>(a.cd[0], a.cd[1]);
vec_st2f(b.v, (float*)&res);
return res;
}
template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
{
return vecs[0];
}
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
{
PacketBlock<Packet2cf,2> transpose;
transpose.packet[0] = vecs[0];
transpose.packet[1] = vecs[1];
ptranspose(transpose);
return padd<Packet2cf>(transpose.packet[0], transpose.packet[1]);
}
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
{
return pfirst(a);
}
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
{
std::complex<float> res;
Packet1cd b = pmul<Packet1cd>(a.cd[0], a.cd[1]);
vec_st2f(b.v, (float*)&res);
return res;
}
template<int Offset>
struct palign_impl<Offset,Packet1cd>
@@ -127,6 +258,18 @@ struct palign_impl<Offset,Packet1cd>
}
};
template<int Offset>
struct palign_impl<Offset,Packet2cf>
{
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
{
if (Offset == 1) {
first.cd[0] = first.cd[1];
first.cd[1] = second.cd[0];
}
}
};
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
@@ -160,6 +303,39 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return internal::pmul(a, pconj(b));
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return internal::pmul(pconj(a), b);
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return pconj(internal::pmul(a, b));
}
};
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for AltiVec
@@ -168,17 +344,49 @@ template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, con
return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64)));
}
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for AltiVec
Packet2cf res;
res.cd[0] = pdiv<Packet1cd>(a.cd[0], b.cd[0]);
res.cd[1] = pdiv<Packet1cd>(a.cd[1], b.cd[1]);
return res;
}
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
{
return Packet1cd(preverse(Packet2d(x.v)));
}
EIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
{
Packet2cf res;
res.cd[0] = pcplxflip(x.cd[0]);
res.cd[1] = pcplxflip(x.cd[1]);
return res;
}
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
{
Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
kernel.packet[0].v = tmp;
}
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
{
Packet1cd tmp = kernel.packet[0].cd[1];
kernel.packet[0].cd[1] = kernel.packet[1].cd[0];
kernel.packet[1].cd[0] = tmp;
}
template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
Packet2cf result;
const Selector<4> ifPacket4 = { ifPacket.select[0], ifPacket.select[0], ifPacket.select[1], ifPacket.select[1] };
result.v = pblend<Packet4f>(ifPacket4, thenPacket.v, elsePacket.v);
return result;
}
} // end namespace internal
} // end namespace Eigen

View File

@@ -3,6 +3,7 @@
//
// Copyright (C) 2007 Julien Pommier
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -19,32 +20,32 @@ namespace Eigen {
namespace internal {
static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet2d pexp<Packet2d>(const Packet2d& _x)
{
Packet2d x = _x;
_EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
_EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
_EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
_EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
_EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
_EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
Packet2d tmp, fx;
Packet2l emm0;
@@ -91,18 +92,44 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
isnumber_mask);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f pexp<Packet4f>(const Packet4f& x)
{
Packet4f res;
res.v4f[0] = pexp<Packet2d>(x.v4f[0]);
res.v4f[1] = pexp<Packet2d>(x.v4f[1]);
return res;
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet2d psqrt<Packet2d>(const Packet2d& x)
{
return __builtin_s390_vfsqdb(x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f psqrt<Packet4f>(const Packet4f& x)
{
Packet4f res;
res.v4f[0] = psqrt<Packet2d>(x.v4f[0]);
res.v4f[1] = psqrt<Packet2d>(x.v4f[1]);
return res;
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet2d prsqrt<Packet2d>(const Packet2d& x) {
// Unfortunately we can't use the much faster mm_rqsrt_pd since it only provides an approximation.
return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f prsqrt<Packet4f>(const Packet4f& x) {
Packet4f res;
res.v4f[0] = prsqrt<Packet2d>(x.v4f[0]);
res.v4f[1] = prsqrt<Packet2d>(x.v4f[1]);
return res;
}
} // end namespace internal
} // end namespace Eigen

View File

@@ -28,9 +28,8 @@ namespace internal {
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#endif
// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
#endif
typedef __vector int Packet4i;
@@ -42,6 +41,10 @@ typedef __vector double Packet2d;
typedef __vector unsigned long long Packet2ul;
typedef __vector long long Packet2l;
typedef struct {
Packet2d v4f[2];
} Packet4f;
typedef union {
int32_t i[4];
uint32_t ui[4];
@@ -88,6 +91,7 @@ static Packet2d p2d_ONE = { 1.0, 1.0 };
static Packet2d p2d_ZERO_ = { -0.0, -0.0 };
static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet16uc>(p2d_ZERO), reinterpret_cast<Packet16uc>(p2d_ONE), 8));
static Packet16uc p16uc_PSET64_HI = { 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
@@ -132,13 +136,11 @@ template<> struct packet_traits<int> : default_packet_traits
typedef Packet4i type;
typedef Packet4i half;
enum {
// FIXME check the Has*
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
HasHalfPacket = 0,
// FIXME check the Has*
HasAdd = 1,
HasSub = 1,
HasMul = 1,
@@ -147,6 +149,37 @@ template<> struct packet_traits<int> : default_packet_traits
};
};
template<> struct packet_traits<float> : default_packet_traits
{
typedef Packet4f type;
typedef Packet4f half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=4,
HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1,
HasMin = 1,
HasMax = 1,
HasAbs = 1,
HasSin = 0,
HasCos = 0,
HasLog = 0,
HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
HasRound = 1,
HasFloor = 1,
HasCeil = 1,
HasNegate = 1,
HasBlend = 1
};
};
template<> struct packet_traits<double> : default_packet_traits
{
typedef Packet2d type;
@@ -157,7 +190,6 @@ template<> struct packet_traits<double> : default_packet_traits
size=2,
HasHalfPacket = 1,
// FIXME check the Has*
HasAdd = 1,
HasSub = 1,
HasMul = 1,
@@ -180,8 +212,12 @@ template<> struct packet_traits<double> : default_packet_traits
};
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
/* Forward declaration */
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f,4>& kernel);
inline std::ostream & operator <<(std::ostream & s, const Packet4i & v)
{
Packet vt;
@@ -222,6 +258,32 @@ inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
return s;
}
/* Helper function to simulate a vec_splat_packet4f
*/
template<int element> EIGEN_STRONG_INLINE Packet4f vec_splat_packet4f(const Packet4f& from)
{
Packet4f splat;
switch (element) {
case 0:
splat.v4f[0] = vec_splat(from.v4f[0], 0);
splat.v4f[1] = splat.v4f[0];
break;
case 1:
splat.v4f[0] = vec_splat(from.v4f[0], 1);
splat.v4f[1] = splat.v4f[0];
break;
case 2:
splat.v4f[0] = vec_splat(from.v4f[1], 0);
splat.v4f[1] = splat.v4f[0];
break;
case 3:
splat.v4f[0] = vec_splat(from.v4f[1], 1);
splat.v4f[1] = splat.v4f[0];
break;
}
return splat;
}
template<int Offset>
struct palign_impl<Offset,Packet4i>
{
@@ -238,6 +300,31 @@ struct palign_impl<Offset,Packet4i>
}
};
/* This is a tricky one, we have to translate float alignment to vector elements of sizeof double
*/
template<int Offset>
struct palign_impl<Offset,Packet4f>
{
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
{
switch (Offset % 4) {
case 1:
first.v4f[0] = vec_sld(first.v4f[0], first.v4f[1], 8);
first.v4f[1] = vec_sld(first.v4f[1], second.v4f[0], 8);
break;
case 2:
first.v4f[0] = first.v4f[1];
first.v4f[1] = second.v4f[0];
break;
case 3:
first.v4f[0] = vec_sld(first.v4f[1], second.v4f[0], 8);
first.v4f[1] = vec_sld(second.v4f[0], second.v4f[1], 8);
break;
}
}
};
template<int Offset>
struct palign_impl<Offset,Packet2d>
{
@@ -257,6 +344,16 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
return vfrom->v4i;
}
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
{
// FIXME: No intrinsic yet
EIGEN_DEBUG_ALIGNED_LOAD
Packet4f vfrom;
vfrom.v4f[0] = vec_ld2f(&from[0]);
vfrom.v4f[1] = vec_ld2f(&from[2]);
return vfrom;
}
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
{
// FIXME: No intrinsic yet
@@ -275,6 +372,15 @@ template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& f
vto->v4i = from;
}
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
{
// FIXME: No intrinsic yet
EIGEN_DEBUG_ALIGNED_STORE
vec_st2f(from.v4f[0], &to[0]);
vec_st2f(from.v4f[1], &to[2]);
}
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
{
// FIXME: No intrinsic yet
@@ -288,10 +394,16 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from)
{
return vec_splats(from);
}
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
return vec_splats(from);
}
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from)
{
Packet4f to;
to.v4f[0] = pset1<Packet2d>(static_cast<const double&>(from));
to.v4f[1] = to.v4f[0];
return to;
}
template<> EIGEN_STRONG_INLINE void
pbroadcast4<Packet4i>(const int *a,
@@ -304,6 +416,17 @@ pbroadcast4<Packet4i>(const int *a,
a3 = vec_splat(a3, 3);
}
template<> EIGEN_STRONG_INLINE void
pbroadcast4<Packet4f>(const float *a,
Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
{
a3 = pload<Packet4f>(a);
a0 = vec_splat_packet4f<0>(a3);
a1 = vec_splat_packet4f<1>(a3);
a2 = vec_splat_packet4f<2>(a3);
a3 = vec_splat_packet4f<3>(a3);
}
template<> EIGEN_STRONG_INLINE void
pbroadcast4<Packet2d>(const double *a,
Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
@@ -326,6 +449,16 @@ template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* f
return pload<Packet4i>(ai);
}
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
{
float EIGEN_ALIGN16 ai[4];
ai[0] = from[0*stride];
ai[1] = from[1*stride];
ai[2] = from[2*stride];
ai[3] = from[3*stride];
return pload<Packet4f>(ai);
}
template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
{
double EIGEN_ALIGN16 af[2];
@@ -344,6 +477,16 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const
to[3*stride] = ai[3];
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
{
float EIGEN_ALIGN16 ai[4];
pstore<float>((float *)ai, from);
to[0*stride] = ai[0];
to[1*stride] = ai[1];
to[2*stride] = ai[2];
to[3*stride] = ai[3];
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
{
double EIGEN_ALIGN16 af[2];
@@ -353,52 +496,160 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to,
}
template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a + b); }
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b)
{
Packet4f c;
c.v4f[0] = a.v4f[0] + b.v4f[0];
c.v4f[1] = a.v4f[1] + b.v4f[1];
return c;
}
template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a + b); }
template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a - b); }
template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b)
{
Packet4f c;
c.v4f[0] = a.v4f[0] - b.v4f[0];
c.v4f[1] = a.v4f[1] - b.v4f[1];
return c;
}
template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a - b); }
template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a * b); }
template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b)
{
Packet4f c;
c.v4f[0] = a.v4f[0] * b.v4f[0];
c.v4f[1] = a.v4f[1] * b.v4f[1];
return c;
}
template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a * b); }
template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a / b); }
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
{
Packet4f c;
c.v4f[0] = a.v4f[0] / b.v4f[0];
c.v4f[1] = a.v4f[1] / b.v4f[1];
return c;
}
template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a / b); }
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return (-a); }
template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
{
Packet4f c;
c.v4f[0] = -a.v4f[0];
c.v4f[1] = -a.v4f[1];
return c;
}
template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return (-a); }
template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd<Packet4i>(pmul<Packet4i>(a, b), c); }
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
{
Packet4f res;
res.v4f[0] = vec_madd(a.v4f[0], b.v4f[0], c.v4f[0]);
res.v4f[1] = vec_madd(a.v4f[1], b.v4f[1], c.v4f[1]);
return res;
}
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return padd<Packet4i>(pset1<Packet4i>(a), p4i_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return padd<Packet4f>(pset1<Packet4f>(a), p4f_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return padd<Packet2d>(pset1<Packet2d>(a), p2d_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
{
Packet4f res;
res.v4f[0] = pmin(a.v4f[0], b.v4f[0]);
res.v4f[1] = pmin(a.v4f[1], b.v4f[1]);
return res;
}
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
{
Packet4f res;
res.v4f[0] = pmax(a.v4f[0], b.v4f[0]);
res.v4f[1] = pmax(a.v4f[1], b.v4f[1]);
return res;
}
template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
{
Packet4f res;
res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
return res;
}
template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
{
Packet4f res;
res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
return res;
}
template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
{
Packet4f res;
res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
return res;
}
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return pand<Packet4i>(a, vec_nor(b, b)); }
template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
{
Packet4f res;
res.v4f[0] = pandnot(a.v4f[0], b.v4f[0]);
res.v4f[1] = pandnot(a.v4f[1], b.v4f[1]);
return res;
}
template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)
{
Packet4f res;
res.v4f[0] = vec_round(a.v4f[0]);
res.v4f[1] = vec_round(a.v4f[1]);
return res;
}
template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }
template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a)
{
Packet4f res;
res.v4f[0] = vec_ceil(a.v4f[0]);
res.v4f[1] = vec_ceil(a.v4f[1]);
return res;
}
template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return vec_ceil(a); }
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
{
Packet4f res;
res.v4f[0] = vec_floor(a.v4f[0]);
res.v4f[1] = vec_floor(a.v4f[1]);
return res;
}
template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { return pload<Packet4i>(from); }
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { return pload<Packet4f>(from); }
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { return pload<Packet2d>(from); }
@@ -408,6 +659,14 @@ template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
return vec_perm(p, p, p16uc_DUPLICATE32_HI);
}
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
{
Packet4f p = pload<Packet4f>(from);
p.v4f[1] = vec_splat(p.v4f[0], 1);
p.v4f[0] = vec_splat(p.v4f[0], 0);
return p;
}
template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
{
Packet2d p = pload<Packet2d>(from);
@@ -415,12 +674,15 @@ template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
}
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { pstore<int>(to, from); }
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { pstore<float>(to, from); }
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { pstore<double>(to, from); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; }
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[2]; vec_st2f(a.v4f[0], &x[0]); return x[0]; }
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; }
template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
@@ -433,8 +695,23 @@ template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
}
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); }
template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
{
Packet4f rev;
rev.v4f[0] = preverse<Packet2d>(a.v4f[1]);
rev.v4f[1] = preverse<Packet2d>(a.v4f[0]);
return rev;
}
template<> EIGEN_STRONG_INLINE Packet4i pabs<Packet4i>(const Packet4i& a) { return vec_abs(a); }
template<> EIGEN_STRONG_INLINE Packet2d pabs<Packet2d>(const Packet2d& a) { return vec_abs(a); }
template<> EIGEN_STRONG_INLINE Packet4f pabs<Packet4f>(const Packet4f& a)
{
Packet4f res;
res.v4f[0] = pabs(a.v4f[0]);
res.v4f[1] = pabs(a.v4f[1]);
return res;
}
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
{
@@ -453,6 +730,13 @@ template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
sum = padd<Packet2d>(a, b);
return pfirst(sum);
}
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
{
Packet2d sum;
sum = padd<Packet2d>(a.v4f[0], a.v4f[1]);
double first = predux<Packet2d>(sum);
return static_cast<float>(first);
}
template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
{
@@ -493,6 +777,21 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
return sum;
}
template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
{
PacketBlock<Packet4f,4> transpose;
transpose.packet[0] = vecs[0];
transpose.packet[1] = vecs[1];
transpose.packet[2] = vecs[2];
transpose.packet[3] = vecs[3];
ptranspose(transpose);
Packet4f sum = padd(transpose.packet[0], transpose.packet[1]);
sum = padd(sum, transpose.packet[2]);
sum = padd(sum, transpose.packet[3]);
return sum;
}
// Other reduction functions:
// mul
template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
@@ -507,6 +806,12 @@ template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
}
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
{
// Return predux_mul<Packet2d> of the subvectors product
return static_cast<float>(pfirst(predux_mul(pmul(a.v4f[0], a.v4f[1]))));
}
// min
template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
{
@@ -521,6 +826,14 @@ template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
return pfirst(pmin<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
}
template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
{
Packet2d b, res;
b = pmin<Packet2d>(a.v4f[0], a.v4f[1]);
res = pmin<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
return static_cast<float>(pfirst(res));
}
// max
template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
{
@@ -536,6 +849,14 @@ template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
return pfirst(pmax<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
}
template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
{
Packet2d b, res;
b = pmax<Packet2d>(a.v4f[0], a.v4f[1]);
res = pmax<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
return static_cast<float>(pfirst(res));
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4i,4>& kernel) {
Packet4i t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
@@ -556,12 +877,61 @@ ptranspose(PacketBlock<Packet2d,2>& kernel) {
kernel.packet[1] = t1;
}
/* Split the Packet4f PacketBlock into 4 Packet2d PacketBlocks and transpose each one
*/
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4f,4>& kernel) {
PacketBlock<Packet2d,2> t0,t1,t2,t3;
// copy top-left 2x2 Packet2d block
t0.packet[0] = kernel.packet[0].v4f[0];
t0.packet[1] = kernel.packet[1].v4f[0];
// copy top-right 2x2 Packet2d block
t1.packet[0] = kernel.packet[0].v4f[1];
t1.packet[1] = kernel.packet[1].v4f[1];
// copy bottom-left 2x2 Packet2d block
t2.packet[0] = kernel.packet[2].v4f[0];
t2.packet[1] = kernel.packet[3].v4f[0];
// copy bottom-right 2x2 Packet2d block
t3.packet[0] = kernel.packet[2].v4f[1];
t3.packet[1] = kernel.packet[3].v4f[1];
// Transpose all 2x2 blocks
ptranspose(t0);
ptranspose(t1);
ptranspose(t2);
ptranspose(t3);
// Copy back transposed blocks, but exchange t1 and t2 due to transposition
kernel.packet[0].v4f[0] = t0.packet[0];
kernel.packet[0].v4f[1] = t2.packet[0];
kernel.packet[1].v4f[0] = t0.packet[1];
kernel.packet[1].v4f[1] = t2.packet[1];
kernel.packet[2].v4f[0] = t1.packet[0];
kernel.packet[2].v4f[1] = t3.packet[0];
kernel.packet[3].v4f[0] = t1.packet[1];
kernel.packet[3].v4f[1] = t3.packet[1];
}
template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
Packet4ui mask = vec_cmpeq(select, reinterpret_cast<Packet4ui>(p4i_ONE));
return vec_sel(elsePacket, thenPacket, mask);
}
template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
Packet2ul select_hi = { ifPacket.select[0], ifPacket.select[1] };
Packet2ul select_lo = { ifPacket.select[2], ifPacket.select[3] };
Packet2ul mask_hi = vec_cmpeq(select_hi, reinterpret_cast<Packet2ul>(p2l_ONE));
Packet2ul mask_lo = vec_cmpeq(select_lo, reinterpret_cast<Packet2ul>(p2l_ONE));
Packet4f result;
result.v4f[0] = vec_sel(elsePacket.v4f[0], thenPacket.v4f[0], mask_hi);
result.v4f[1] = vec_sel(elsePacket.v4f[1], thenPacket.v4f[1], mask_lo);
return result;
}
template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
Packet2ul select = { ifPacket.select[0], ifPacket.select[1] };
Packet2ul mask = vec_cmpeq(select, reinterpret_cast<Packet2ul>(p2l_ONE));

View File

@@ -266,7 +266,7 @@ struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
// typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
{
using std::sqrt;
EIGEN_USING_STD_MATH(sqrt)
Scalar p, qp;
if(_x>_y)
{

View File

@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2008-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -37,87 +37,78 @@ template<typename Scalar>
struct functor_traits<scalar_identity_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
template <typename Scalar, typename Packet, bool RandomAccess, bool IsInteger> struct linspaced_op_impl;
template <typename Scalar, typename Packet, bool IsInteger> struct linspaced_op_impl;
// linear access for packet ops:
// 1) initialization
// base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0])
// 2) each step (where size is 1 for coeff access or PacketSize for packet access)
// base += [size*step, ..., size*step]
//
// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
// in order to avoid the padd() in operator() ?
template <typename Scalar, typename Packet>
struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/false,/*IsInteger*/false>
struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
{
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
m_packetStep(pset1<Packet>(unpacket_traits<Packet>::size*m_step)),
m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(m_step),plset<Packet>(-unpacket_traits<Packet>::size)))) {}
m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
m_interPacket(plset<Packet>(0)),
m_flip(numext::abs(high)<numext::abs(low))
{}
template<typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const
{
m_base = padd(m_base, pset1<Packet>(m_step));
return m_low+Scalar(i)*m_step;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const {
if(m_flip)
return (i==0)? m_low : (m_high - (m_size1-i)*m_step);
else
return (i==m_size1)? m_high : (m_low + i*m_step);
}
template<typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType) const { return m_base = padd(m_base,m_packetStep); }
const Scalar m_low;
const Scalar m_step;
const Packet m_packetStep;
mutable Packet m_base;
};
// random access for packet ops:
// 1) each step
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
template <typename Scalar, typename Packet>
struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/false>
{
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Packet>(0)) {}
template<typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { return m_low+i*m_step; }
template<typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const
{ return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(Scalar(i)),m_interPacket))); }
{
// Principle:
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
if(m_flip)
{
Packet pi = padd(pset1<Packet>(Scalar(i-m_size1)),m_interPacket);
Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi));
if(i==0)
res = pinsertfirst(res, m_low);
return res;
}
else
{
Packet pi = padd(pset1<Packet>(Scalar(i)),m_interPacket);
Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi));
if(i==m_size1-unpacket_traits<Packet>::size+1)
res = pinsertlast(res, m_high);
return res;
}
}
const Scalar m_low;
const Scalar m_high;
const Index m_size1;
const Scalar m_step;
const Packet m_lowPacket;
const Packet m_stepPacket;
const Packet m_interPacket;
const bool m_flip;
};
template <typename Scalar, typename Packet>
struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/true>
struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/true>
{
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
m_low(low), m_length(high-low), m_divisor(convert_index<Scalar>(num_steps==1?1:num_steps-1)), m_interPacket(plset<Packet>(0))
m_low(low),
m_multiplier((high-low)/convert_index<Scalar>(num_steps<=1 ? 1 : num_steps-1)),
m_divisor(convert_index<Scalar>(num_steps+high-low)/(high-low+1)),
m_use_divisor((high+1)<(low+num_steps))
{}
template<typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Scalar operator() (IndexType i) const {
return m_low + (m_length*Scalar(i))/m_divisor;
const Scalar operator() (IndexType i) const
{
if(m_use_divisor) return m_low + convert_index<Scalar>(i)/m_divisor;
else return m_low + convert_index<Scalar>(i)*m_multiplier;
}
template<typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Packet packetOp(IndexType i) const {
return internal::padd(pset1<Packet>(m_low), pdiv(pmul(pset1<Packet>(m_length), padd(pset1<Packet>(Scalar(i)),m_interPacket)),
pset1<Packet>(m_divisor))); }
const Scalar m_low;
const Scalar m_length;
const Scalar m_divisor;
const Packet m_interPacket;
const Scalar m_multiplier;
const Scalar m_divisor;
const bool m_use_divisor;
};
// ----- Linspace functor ----------------------------------------------------------------
@@ -125,18 +116,18 @@ struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/true>
// Forward declaration (we default to random access which does not really give
// us a speed gain when using packet access but it allows to use the functor in
// nested expressions).
template <typename Scalar, typename PacketType, bool RandomAccess = true> struct linspaced_op;
template <typename Scalar, typename PacketType, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,PacketType,RandomAccess> >
template <typename Scalar, typename PacketType> struct linspaced_op;
template <typename Scalar, typename PacketType> struct functor_traits< linspaced_op<Scalar,PacketType> >
{
enum
{
Cost = 1,
PacketAccess = packet_traits<Scalar>::HasSetLinear
&& ((!NumTraits<Scalar>::IsInteger) || packet_traits<Scalar>::HasDiv),
PacketAccess = (!NumTraits<Scalar>::IsInteger) && packet_traits<Scalar>::HasSetLinear && packet_traits<Scalar>::HasBlend,
/*&& ((!NumTraits<Scalar>::IsInteger) || packet_traits<Scalar>::HasDiv),*/ // <- vectorization for integer is currently disabled
IsRepeatable = true
};
};
template <typename Scalar, typename PacketType, bool RandomAccess> struct linspaced_op
template <typename Scalar, typename PacketType> struct linspaced_op
{
linspaced_op(const Scalar& low, const Scalar& high, Index num_steps)
: impl((num_steps==1 ? high : low),high,num_steps)
@@ -148,20 +139,49 @@ template <typename Scalar, typename PacketType, bool RandomAccess> struct linspa
template<typename Packet,typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const { return impl.packetOp(i); }
// This proxy object handles the actual required temporaries, the different
// implementations (random vs. sequential access) as well as the
// correct piping to size 2/4 packet operations.
// As long as we don't have a Bresenham-like implementation for linear-access and integer types,
// we have to by-pass RandomAccess for integer types. See bug 698.
const linspaced_op_impl<Scalar,PacketType,(NumTraits<Scalar>::IsInteger?true:RandomAccess),NumTraits<Scalar>::IsInteger> impl;
// This proxy object handles the actual required temporaries and the different
// implementations (integer vs. floating point).
const linspaced_op_impl<Scalar,PacketType,NumTraits<Scalar>::IsInteger> impl;
};
// Linear access is automatically determined from the operator() prototypes available for the given functor.
// If it exposes an operator()(i,j), then we assume the i and j coefficients are required independently
// and linear access is not possible. In all other cases, linear access is enabled.
// Users should not have to deal with this struture.
// Users should not have to deal with this structure.
template<typename Functor> struct functor_has_linear_access { enum { ret = !has_binary_operator<Functor>::value }; };
// For unreliable compilers, let's specialize the has_*ary_operator
// helpers so that at least built-in nullary functors work fine.
#if !( (EIGEN_COMP_MSVC>1600) || (EIGEN_GNUC_AT_LEAST(4,8)) || (EIGEN_COMP_ICC>=1600))
template<typename Scalar,typename IndexType>
struct has_nullary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 1}; };
template<typename Scalar,typename IndexType>
struct has_unary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 0}; };
template<typename Scalar,typename IndexType>
struct has_binary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 0}; };
template<typename Scalar,typename IndexType>
struct has_nullary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 0}; };
template<typename Scalar,typename IndexType>
struct has_unary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 0}; };
template<typename Scalar,typename IndexType>
struct has_binary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 1}; };
template<typename Scalar, typename PacketType,typename IndexType>
struct has_nullary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 0}; };
template<typename Scalar, typename PacketType,typename IndexType>
struct has_unary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 1}; };
template<typename Scalar, typename PacketType,typename IndexType>
struct has_binary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 0}; };
template<typename Scalar,typename IndexType>
struct has_nullary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 1}; };
template<typename Scalar,typename IndexType>
struct has_unary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 0}; };
template<typename Scalar,typename IndexType>
struct has_binary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 0}; };
#endif
} // end namespace internal
} // end namespace Eigen

View File

@@ -321,7 +321,7 @@ struct functor_traits<scalar_log1p_op<Scalar> > {
*/
template<typename Scalar> struct scalar_log10_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_log10_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log10; return log10(a); }
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { EIGEN_USING_STD_MATH(log10) return log10(a); }
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog10(a); }
};

View File

@@ -580,7 +580,7 @@ DoublePacket<Packet> padd(const DoublePacket<Packet> &a, const DoublePacket<Pack
}
template<typename Packet>
const DoublePacket<Packet>& predux4(const DoublePacket<Packet> &a)
const DoublePacket<Packet>& predux_downto4(const DoublePacket<Packet> &a)
{
return a;
}
@@ -972,7 +972,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \
EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
internal::prefetch(blA+(3*K+16)*LhsProgress); \
if (EIGEN_ARCH_ARM) internal::prefetch(blB+(4*K+16)*RhsProgress); /* Bug 953 */ \
if (EIGEN_ARCH_ARM) { internal::prefetch(blB+(4*K+16)*RhsProgress); } /* Bug 953 */ \
traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
@@ -1581,10 +1581,10 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
if(SwappedTraits::LhsProgress==8)
{
// Special case where we have to first reduce the accumulation register C0
typedef typename conditional<SwappedTraits::LhsProgress==8,typename unpacket_traits<SResPacket>::half,SResPacket>::type SResPacketHalf;
typedef typename conditional<SwappedTraits::LhsProgress==8,typename unpacket_traits<SLhsPacket>::half,SLhsPacket>::type SLhsPacketHalf;
typedef typename conditional<SwappedTraits::LhsProgress==8,typename unpacket_traits<SLhsPacket>::half,SRhsPacket>::type SRhsPacketHalf;
typedef typename conditional<SwappedTraits::LhsProgress==8,typename unpacket_traits<SAccPacket>::half,SAccPacket>::type SAccPacketHalf;
typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SResPacket>::half,SResPacket>::type SResPacketHalf;
typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SLhsPacket>::half,SLhsPacket>::type SLhsPacketHalf;
typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SLhsPacket>::half,SRhsPacket>::type SRhsPacketHalf;
typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SAccPacket>::half,SAccPacket>::type SAccPacketHalf;
SResPacketHalf R = res.template gatherPacket<SResPacketHalf>(i, j2);
SResPacketHalf alphav = pset1<SResPacketHalf>(alpha);
@@ -1596,13 +1596,13 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
SRhsPacketHalf b0;
straits.loadLhsUnaligned(blB, a0);
straits.loadRhs(blA, b0);
SAccPacketHalf c0 = predux4(C0);
SAccPacketHalf c0 = predux_downto4(C0);
straits.madd(a0,b0,c0,b0);
straits.acc(c0, alphav, R);
}
else
{
straits.acc(predux4(C0), alphav, R);
straits.acc(predux_downto4(C0), alphav, R);
}
res.scatterPacket(i, j2, R);
}

View File

@@ -10,7 +10,7 @@
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_H
#define EIGEN_GENERAL_MATRIX_MATRIX_H
namespace Eigen {
namespace Eigen {
namespace internal {
@@ -24,7 +24,7 @@ template<
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
{
typedef gebp_traits<RhsScalar,LhsScalar> Traits;
typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(
Index rows, Index cols, Index depth,
@@ -54,7 +54,7 @@ struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLh
{
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static void run(Index rows, Index cols, Index depth,
const LhsScalar* _lhs, Index lhsStride,
@@ -85,13 +85,13 @@ static void run(Index rows, Index cols, Index depth,
// this is the parallel version!
Index tid = omp_get_thread_num();
Index threads = omp_get_num_threads();
LhsScalar* blockA = blocking.blockA();
eigen_internal_assert(blockA!=0);
std::size_t sizeB = kc*nc;
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0);
// For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
for(Index k=0; k<depth; k+=kc)
{
@@ -114,7 +114,7 @@ static void run(Index rows, Index cols, Index depth,
// Notify the other threads that the part A'_i is ready to go.
info[tid].sync = k;
// Computes C_i += A' * B' per A'_i
for(Index shift=0; shift<threads; ++shift)
{
@@ -161,7 +161,7 @@ static void run(Index rows, Index cols, Index depth,
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
const bool pack_rhs_once = mc!=rows && kc==depth && nc==cols;
// For each horizontal panel of the rhs, and corresponding panel of the lhs...
@@ -172,24 +172,24 @@ static void run(Index rows, Index cols, Index depth,
for(Index k2=0; k2<depth; k2+=kc)
{
const Index actual_kc = (std::min)(k2+kc,depth)-k2;
// OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.
// => Pack lhs's panel into a sequential chunk of memory (L2/L3 caching)
// Note that this panel will be read as many times as the number of blocks in the rhs's
// horizontal panel which is, in practice, a very low number.
pack_lhs(blockA, lhs.getSubMapper(i2,k2), actual_kc, actual_mc);
// For each kc x nc block of the rhs's horizontal panel...
for(Index j2=0; j2<cols; j2+=nc)
{
const Index actual_nc = (std::min)(j2+nc,cols)-j2;
// We pack the rhs's block into a sequential chunk of memory (L2 caching)
// Note that this block will be read a very high number of times, which is equal to the number of
// micro horizontal panel of the large rhs's panel (e.g., rows/12 times).
if((!pack_rhs_once) || i2==0)
pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc);
// Everything is packed, we can now call the panel * block kernel:
gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, alpha);
}
@@ -229,7 +229,7 @@ struct gemm_functor
(Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
m_actualAlpha, m_blocking, info);
}
typedef typename Gemm::Traits Traits;
protected:
@@ -313,7 +313,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
this->m_blockB = reinterpret_cast<RhsScalar*>((internal::UIntPtr(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
#endif
}
void initParallel(Index, Index, Index, Index)
{}
@@ -359,14 +359,14 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
m_sizeA = this->m_mc * this->m_kc;
m_sizeB = this->m_kc * this->m_nc;
}
void initParallel(Index rows, Index cols, Index depth, Index num_threads)
{
this->m_mc = Transpose ? cols : rows;
this->m_nc = Transpose ? rows : cols;
this->m_kc = depth;
eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);
eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);
Index m = this->m_mc;
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc, num_threads);
m_sizeA = this->m_mc * this->m_kc;
@@ -401,7 +401,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
} // end namespace internal
namespace internal {
template<typename Lhs, typename Rhs>
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> >
@@ -409,21 +409,21 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
enum {
MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime)
};
typedef generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> lazyproduct;
template<typename Dst>
static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
@@ -453,7 +453,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
else
scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
}
template<typename Dest>
static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha)
{
@@ -481,7 +481,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true);
internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>
(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), Dest::Flags&RowMajorBit);
(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), a_lhs.cols(), Dest::Flags&RowMajorBit);
}
};

View File

@@ -10,7 +10,7 @@
#ifndef EIGEN_PARALLELIZER_H
#define EIGEN_PARALLELIZER_H
namespace Eigen {
namespace Eigen {
namespace internal {
@@ -83,7 +83,7 @@ template<typename Index> struct GemmParallelInfo
};
template<bool Condition, typename Functor, typename Index>
void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose)
{
// TODO when EIGEN_USE_BLAS is defined,
// we should still enable OMP for other scalar types
@@ -92,6 +92,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
// the matrix product when multithreading is enabled. This is a temporary
// fix to support row-major destination matrices. This whole
// parallelizer mechanism has to be redisigned anyway.
EIGEN_UNUSED_VARIABLE(depth);
EIGEN_UNUSED_VARIABLE(transpose);
func(0,rows, 0,cols);
#else
@@ -106,6 +107,12 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
// FIXME this has to be fine tuned
Index size = transpose ? rows : cols;
Index pb_max_threads = std::max<Index>(1,size / 32);
// compute the maximal number of threads from the total amount of work:
double work = static_cast<double>(rows) * static_cast<double>(cols) *
static_cast<double>(depth);
double kMinTaskSize = 50000; // Heuristic.
pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize));
// compute the number of threads we are going to use
Index threads = std::min<Index>(nbThreads(), pb_max_threads);
@@ -120,19 +127,19 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
if(transpose)
std::swap(rows,cols);
ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);
#pragma omp parallel num_threads(threads)
{
Index i = omp_get_thread_num();
// Note that the actual number of threads might be lower than the number of request ones.
Index actual_threads = omp_get_num_threads();
Index blockCols = (cols / actual_threads) & ~Index(0x3);
Index blockRows = (rows / actual_threads);
blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
Index r0 = i*blockRows;
Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;

View File

@@ -14,12 +14,13 @@
// 4512 - assignment operator could not be generated
// 4522 - 'class' : multiple assignment operators specified
// 4700 - uninitialized local variable 'xyz' used
// 4714 - function marked as __forceinline not inlined
// 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow
// 4800 - 'type' : forcing value to bool 'true' or 'false' (performance warning)
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma warning( push )
#endif
#pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 4800)
#pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800)
#elif defined __INTEL_COMPILER
// 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
@@ -67,6 +68,8 @@
#pragma diag_suppress 2669
#pragma diag_suppress 2670
#pragma diag_suppress 2671
#pragma diag_suppress 2735
#pragma diag_suppress 2737
#endif
#endif // not EIGEN_WARNINGS_DISABLED

View File

@@ -12,8 +12,8 @@
#define EIGEN_MACROS_H
#define EIGEN_WORLD_VERSION 3
#define EIGEN_MAJOR_VERSION 2
#define EIGEN_MINOR_VERSION 94
#define EIGEN_MAJOR_VERSION 3
#define EIGEN_MINOR_VERSION 1
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@@ -356,6 +356,13 @@
#define EIGEN_MAX_CPP_VER 99
#endif
#if EIGEN_MAX_CPP_VER>=11 && defined(__cplusplus) && (__cplusplus >= 201103L)
#define EIGEN_HAS_CXX11 1
#else
#define EIGEN_HAS_CXX11 0
#endif
// Do we support r-value references?
#ifndef EIGEN_HAS_RVALUE_REFERENCES
#if EIGEN_MAX_CPP_VER>=11 && \
@@ -392,8 +399,8 @@
// Does the compiler support variadic templates?
#ifndef EIGEN_HAS_VARIADIC_TEMPLATES
#if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
&& ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 )
// ^^ Disable the use of variadic templates when compiling with nvcc on ARM devices:
&& ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) )
// ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices:
// this prevents nvcc from crashing when compiling Eigen on Tegra X1
#define EIGEN_HAS_VARIADIC_TEMPLATES 1
#else
@@ -652,6 +659,9 @@ namespace Eigen {
// If the user explicitly disable vectorization, then we also disable alignment
#if defined(EIGEN_DONT_VECTORIZE)
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
#elif defined(EIGEN_VECTORIZE_AVX512)
// 64 bytes static alignmeent is preferred only if really required
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
#elif defined(__AVX__)
// 32 bytes static alignmeent is preferred only if really required
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
@@ -801,7 +811,7 @@ namespace Eigen {
// just an empty macro !
#define EIGEN_EMPTY
#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1900 // for older MSVC versions using the base operator is sufficient (cf Bug 1000)
#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || __CUDACC_VER__) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
using Base::operator =;
#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)

View File

@@ -523,7 +523,7 @@ template<typename T> struct smart_memmove_helper<T,true> {
template<typename T> struct smart_memmove_helper<T,false> {
static inline void run(const T* start, const T* end, T* target)
{
if (uintptr_t(target) < uintptr_t(start))
if (UIntPtr(target) < UIntPtr(start))
{
std::copy(start, end, target);
}

View File

@@ -381,12 +381,12 @@ struct has_ReturnType
enum { value = sizeof(testFunctor<T>(0)) == sizeof(meta_yes) };
};
template<typename T> const T& return_ref();
template<typename T> const T* return_ptr();
template <typename T, typename IndexType=Index>
struct has_nullary_operator
{
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref<C>().operator()())>0)>::type * = 0);
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()())>0)>::type * = 0);
static meta_no testFunctor(...);
enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
@@ -395,7 +395,7 @@ struct has_nullary_operator
template <typename T, typename IndexType=Index>
struct has_unary_operator
{
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref<C>().operator()(IndexType(0)))>0)>::type * = 0);
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0)))>0)>::type * = 0);
static meta_no testFunctor(...);
enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
@@ -404,7 +404,7 @@ struct has_unary_operator
template <typename T, typename IndexType=Index>
struct has_binary_operator
{
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref<C>().operator()(IndexType(0),IndexType(0)))>0)>::type * = 0);
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0),IndexType(0)))>0)>::type * = 0);
static meta_no testFunctor(...);
enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };

View File

@@ -100,7 +100,8 @@
MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY,
THIS_TYPE_IS_NOT_SUPPORTED,
STORAGE_KIND_MUST_MATCH,
STORAGE_INDEX_MUST_MATCH
STORAGE_INDEX_MUST_MATCH,
CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY
};
};

View File

@@ -191,7 +191,7 @@ struct find_best_packet
#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
template<int ArrayBytes, int AlignmentBytes,
bool Match = bool((ArrayBytes%AlignmentBytes)==0),
bool TryHalf = bool(AlignmentBytes>EIGEN_MIN_ALIGN_BYTES) >
bool TryHalf = bool(EIGEN_MIN_ALIGN_BYTES<AlignmentBytes) >
struct compute_default_alignment_helper
{
enum { value = 0 };
@@ -445,15 +445,11 @@ template<typename T, int n, typename PlainObject = typename plain_object_eval<T>
// Another solution could be to count the number of temps?
NAsInteger = n == Dynamic ? HugeCost : n,
CostEval = (NAsInteger+1) * ScalarReadCost + CoeffReadCost,
CostNoEval = NAsInteger * CoeffReadCost
CostNoEval = NAsInteger * CoeffReadCost,
Evaluate = (int(evaluator<T>::Flags) & EvalBeforeNestingBit) || (int(CostEval) < int(CostNoEval))
};
typedef typename conditional<
( (int(evaluator<T>::Flags) & EvalBeforeNestingBit) ||
(int(CostEval) < int(CostNoEval)) ),
PlainObject,
typename ref_selector<T>::type
>::type type;
typedef typename conditional<Evaluate, PlainObject, typename ref_selector<T>::type>::type type;
};
template<typename T>

View File

@@ -62,57 +62,57 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
/** Default constructor initializing a null box. */
inline AlignedBox()
EIGEN_DEVICE_FUNC inline AlignedBox()
{ if (AmbientDimAtCompileTime!=Dynamic) setEmpty(); }
/** Constructs a null box with \a _dim the dimension of the ambient space. */
inline explicit AlignedBox(Index _dim) : m_min(_dim), m_max(_dim)
EIGEN_DEVICE_FUNC inline explicit AlignedBox(Index _dim) : m_min(_dim), m_max(_dim)
{ setEmpty(); }
/** Constructs a box with extremities \a _min and \a _max.
* \warning If either component of \a _min is larger than the same component of \a _max, the constructed box is empty. */
template<typename OtherVectorType1, typename OtherVectorType2>
inline AlignedBox(const OtherVectorType1& _min, const OtherVectorType2& _max) : m_min(_min), m_max(_max) {}
EIGEN_DEVICE_FUNC inline AlignedBox(const OtherVectorType1& _min, const OtherVectorType2& _max) : m_min(_min), m_max(_max) {}
/** Constructs a box containing a single point \a p. */
template<typename Derived>
inline explicit AlignedBox(const MatrixBase<Derived>& p) : m_min(p), m_max(m_min)
EIGEN_DEVICE_FUNC inline explicit AlignedBox(const MatrixBase<Derived>& p) : m_min(p), m_max(m_min)
{ }
~AlignedBox() {}
EIGEN_DEVICE_FUNC ~AlignedBox() {}
/** \returns the dimension in which the box holds */
inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_min.size() : Index(AmbientDimAtCompileTime); }
EIGEN_DEVICE_FUNC inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_min.size() : Index(AmbientDimAtCompileTime); }
/** \deprecated use isEmpty() */
inline bool isNull() const { return isEmpty(); }
EIGEN_DEVICE_FUNC inline bool isNull() const { return isEmpty(); }
/** \deprecated use setEmpty() */
inline void setNull() { setEmpty(); }
EIGEN_DEVICE_FUNC inline void setNull() { setEmpty(); }
/** \returns true if the box is empty.
* \sa setEmpty */
inline bool isEmpty() const { return (m_min.array() > m_max.array()).any(); }
EIGEN_DEVICE_FUNC inline bool isEmpty() const { return (m_min.array() > m_max.array()).any(); }
/** Makes \c *this an empty box.
* \sa isEmpty */
inline void setEmpty()
EIGEN_DEVICE_FUNC inline void setEmpty()
{
m_min.setConstant( ScalarTraits::highest() );
m_max.setConstant( ScalarTraits::lowest() );
}
/** \returns the minimal corner */
inline const VectorType& (min)() const { return m_min; }
EIGEN_DEVICE_FUNC inline const VectorType& (min)() const { return m_min; }
/** \returns a non const reference to the minimal corner */
inline VectorType& (min)() { return m_min; }
EIGEN_DEVICE_FUNC inline VectorType& (min)() { return m_min; }
/** \returns the maximal corner */
inline const VectorType& (max)() const { return m_max; }
EIGEN_DEVICE_FUNC inline const VectorType& (max)() const { return m_max; }
/** \returns a non const reference to the maximal corner */
inline VectorType& (max)() { return m_max; }
EIGEN_DEVICE_FUNC inline VectorType& (max)() { return m_max; }
/** \returns the center of the box */
inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(VectorTypeSum, RealScalar, quotient)
EIGEN_DEVICE_FUNC inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(VectorTypeSum, RealScalar, quotient)
center() const
{ return (m_min+m_max)/RealScalar(2); }
@@ -120,18 +120,18 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
* Note that this function does not get the same
* result for integral or floating scalar types: see
*/
inline const CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> sizes() const
EIGEN_DEVICE_FUNC inline const CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> sizes() const
{ return m_max - m_min; }
/** \returns the volume of the bounding box */
inline Scalar volume() const
EIGEN_DEVICE_FUNC inline Scalar volume() const
{ return sizes().prod(); }
/** \returns an expression for the bounding box diagonal vector
* if the length of the diagonal is needed: diagonal().norm()
* will provide it.
*/
inline CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> diagonal() const
EIGEN_DEVICE_FUNC inline CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> diagonal() const
{ return sizes(); }
/** \returns the vertex of the bounding box at the corner defined by
@@ -143,7 +143,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
* For 3D bounding boxes, the following names are added:
* BottomLeftCeil, BottomRightCeil, TopLeftCeil, TopRightCeil.
*/
inline VectorType corner(CornerType corner) const
EIGEN_DEVICE_FUNC inline VectorType corner(CornerType corner) const
{
EIGEN_STATIC_ASSERT(_AmbientDim <= 3, THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE);
@@ -161,7 +161,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
/** \returns a random point inside the bounding box sampled with
* a uniform distribution */
inline VectorType sample() const
EIGEN_DEVICE_FUNC inline VectorType sample() const
{
VectorType r(dim());
for(Index d=0; d<dim(); ++d)
@@ -179,25 +179,25 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
/** \returns true if the point \a p is inside the box \c *this. */
template<typename Derived>
inline bool contains(const MatrixBase<Derived>& p) const
EIGEN_DEVICE_FUNC inline bool contains(const MatrixBase<Derived>& p) const
{
typename internal::nested_eval<Derived,2>::type p_n(p.derived());
return (m_min.array()<=p_n.array()).all() && (p_n.array()<=m_max.array()).all();
}
/** \returns true if the box \a b is entirely inside the box \c *this. */
inline bool contains(const AlignedBox& b) const
EIGEN_DEVICE_FUNC inline bool contains(const AlignedBox& b) const
{ return (m_min.array()<=(b.min)().array()).all() && ((b.max)().array()<=m_max.array()).all(); }
/** \returns true if the box \a b is intersecting the box \c *this.
* \sa intersection, clamp */
inline bool intersects(const AlignedBox& b) const
EIGEN_DEVICE_FUNC inline bool intersects(const AlignedBox& b) const
{ return (m_min.array()<=(b.max)().array()).all() && ((b.min)().array()<=m_max.array()).all(); }
/** Extends \c *this such that it contains the point \a p and returns a reference to \c *this.
* \sa extend(const AlignedBox&) */
template<typename Derived>
inline AlignedBox& extend(const MatrixBase<Derived>& p)
EIGEN_DEVICE_FUNC inline AlignedBox& extend(const MatrixBase<Derived>& p)
{
typename internal::nested_eval<Derived,2>::type p_n(p.derived());
m_min = m_min.cwiseMin(p_n);
@@ -207,7 +207,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
/** Extends \c *this such that it contains the box \a b and returns a reference to \c *this.
* \sa merged, extend(const MatrixBase&) */
inline AlignedBox& extend(const AlignedBox& b)
EIGEN_DEVICE_FUNC inline AlignedBox& extend(const AlignedBox& b)
{
m_min = m_min.cwiseMin(b.m_min);
m_max = m_max.cwiseMax(b.m_max);
@@ -217,7 +217,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
/** Clamps \c *this by the box \a b and returns a reference to \c *this.
* \note If the boxes don't intersect, the resulting box is empty.
* \sa intersection(), intersects() */
inline AlignedBox& clamp(const AlignedBox& b)
EIGEN_DEVICE_FUNC inline AlignedBox& clamp(const AlignedBox& b)
{
m_min = m_min.cwiseMax(b.m_min);
m_max = m_max.cwiseMin(b.m_max);
@@ -227,18 +227,18 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
/** Returns an AlignedBox that is the intersection of \a b and \c *this
* \note If the boxes don't intersect, the resulting box is empty.
* \sa intersects(), clamp, contains() */
inline AlignedBox intersection(const AlignedBox& b) const
EIGEN_DEVICE_FUNC inline AlignedBox intersection(const AlignedBox& b) const
{return AlignedBox(m_min.cwiseMax(b.m_min), m_max.cwiseMin(b.m_max)); }
/** Returns an AlignedBox that is the union of \a b and \c *this.
* \note Merging with an empty box may result in a box bigger than \c *this.
* \sa extend(const AlignedBox&) */
inline AlignedBox merged(const AlignedBox& b) const
EIGEN_DEVICE_FUNC inline AlignedBox merged(const AlignedBox& b) const
{ return AlignedBox(m_min.cwiseMin(b.m_min), m_max.cwiseMax(b.m_max)); }
/** Translate \c *this by the vector \a t and returns a reference to \c *this. */
template<typename Derived>
inline AlignedBox& translate(const MatrixBase<Derived>& a_t)
EIGEN_DEVICE_FUNC inline AlignedBox& translate(const MatrixBase<Derived>& a_t)
{
const typename internal::nested_eval<Derived,2>::type t(a_t.derived());
m_min += t;
@@ -251,28 +251,28 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
* \sa exteriorDistance(const MatrixBase&), squaredExteriorDistance(const AlignedBox&)
*/
template<typename Derived>
inline Scalar squaredExteriorDistance(const MatrixBase<Derived>& p) const;
EIGEN_DEVICE_FUNC inline Scalar squaredExteriorDistance(const MatrixBase<Derived>& p) const;
/** \returns the squared distance between the boxes \a b and \c *this,
* and zero if the boxes intersect.
* \sa exteriorDistance(const AlignedBox&), squaredExteriorDistance(const MatrixBase&)
*/
inline Scalar squaredExteriorDistance(const AlignedBox& b) const;
EIGEN_DEVICE_FUNC inline Scalar squaredExteriorDistance(const AlignedBox& b) const;
/** \returns the distance between the point \a p and the box \c *this,
* and zero if \a p is inside the box.
* \sa squaredExteriorDistance(const MatrixBase&), exteriorDistance(const AlignedBox&)
*/
template<typename Derived>
inline NonInteger exteriorDistance(const MatrixBase<Derived>& p) const
{ using std::sqrt; return sqrt(NonInteger(squaredExteriorDistance(p))); }
EIGEN_DEVICE_FUNC inline NonInteger exteriorDistance(const MatrixBase<Derived>& p) const
{ EIGEN_USING_STD_MATH(sqrt) return sqrt(NonInteger(squaredExteriorDistance(p))); }
/** \returns the distance between the boxes \a b and \c *this,
* and zero if the boxes intersect.
* \sa squaredExteriorDistance(const AlignedBox&), exteriorDistance(const MatrixBase&)
*/
inline NonInteger exteriorDistance(const AlignedBox& b) const
{ using std::sqrt; return sqrt(NonInteger(squaredExteriorDistance(b))); }
EIGEN_DEVICE_FUNC inline NonInteger exteriorDistance(const AlignedBox& b) const
{ EIGEN_USING_STD_MATH(sqrt) return sqrt(NonInteger(squaredExteriorDistance(b))); }
/** \returns \c *this with scalar type casted to \a NewScalarType
*
@@ -280,7 +280,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
* then this function smartly returns a const reference to \c *this.
*/
template<typename NewScalarType>
inline typename internal::cast_return_type<AlignedBox,
EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<AlignedBox,
AlignedBox<NewScalarType,AmbientDimAtCompileTime> >::type cast() const
{
return typename internal::cast_return_type<AlignedBox,
@@ -289,7 +289,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
/** Copy constructor with scalar type conversion */
template<typename OtherScalarType>
inline explicit AlignedBox(const AlignedBox<OtherScalarType,AmbientDimAtCompileTime>& other)
EIGEN_DEVICE_FUNC inline explicit AlignedBox(const AlignedBox<OtherScalarType,AmbientDimAtCompileTime>& other)
{
m_min = (other.min)().template cast<Scalar>();
m_max = (other.max)().template cast<Scalar>();
@@ -299,7 +299,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
* determined by \a prec.
*
* \sa MatrixBase::isApprox() */
bool isApprox(const AlignedBox& other, const RealScalar& prec = ScalarTraits::dummy_precision()) const
EIGEN_DEVICE_FUNC bool isApprox(const AlignedBox& other, const RealScalar& prec = ScalarTraits::dummy_precision()) const
{ return m_min.isApprox(other.m_min, prec) && m_max.isApprox(other.m_max, prec); }
protected:
@@ -311,7 +311,7 @@ protected:
template<typename Scalar,int AmbientDim>
template<typename Derived>
inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const MatrixBase<Derived>& a_p) const
EIGEN_DEVICE_FUNC inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const MatrixBase<Derived>& a_p) const
{
typename internal::nested_eval<Derived,2*AmbientDim>::type p(a_p.derived());
Scalar dist2(0);
@@ -333,7 +333,7 @@ inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const Matri
}
template<typename Scalar,int AmbientDim>
inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const AlignedBox& b) const
EIGEN_DEVICE_FUNC inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const AlignedBox& b) const
{
Scalar dist2(0);
Scalar aux;

View File

@@ -69,59 +69,61 @@ protected:
public:
/** Default constructor without initialization. */
AngleAxis() {}
EIGEN_DEVICE_FUNC AngleAxis() {}
/** Constructs and initialize the angle-axis rotation from an \a angle in radian
* and an \a axis which \b must \b be \b normalized.
*
* \warning If the \a axis vector is not normalized, then the angle-axis object
* represents an invalid rotation. */
template<typename Derived>
EIGEN_DEVICE_FUNC
inline AngleAxis(const Scalar& angle, const MatrixBase<Derived>& axis) : m_axis(axis), m_angle(angle) {}
/** Constructs and initialize the angle-axis rotation from a quaternion \a q.
* This function implicitly normalizes the quaternion \a q.
*/
template<typename QuatDerived> inline explicit AngleAxis(const QuaternionBase<QuatDerived>& q) { *this = q; }
template<typename QuatDerived>
EIGEN_DEVICE_FUNC inline explicit AngleAxis(const QuaternionBase<QuatDerived>& q) { *this = q; }
/** Constructs and initialize the angle-axis rotation from a 3x3 rotation matrix. */
template<typename Derived>
inline explicit AngleAxis(const MatrixBase<Derived>& m) { *this = m; }
EIGEN_DEVICE_FUNC inline explicit AngleAxis(const MatrixBase<Derived>& m) { *this = m; }
/** \returns the value of the rotation angle in radian */
Scalar angle() const { return m_angle; }
EIGEN_DEVICE_FUNC Scalar angle() const { return m_angle; }
/** \returns a read-write reference to the stored angle in radian */
Scalar& angle() { return m_angle; }
EIGEN_DEVICE_FUNC Scalar& angle() { return m_angle; }
/** \returns the rotation axis */
const Vector3& axis() const { return m_axis; }
EIGEN_DEVICE_FUNC const Vector3& axis() const { return m_axis; }
/** \returns a read-write reference to the stored rotation axis.
*
* \warning The rotation axis must remain a \b unit vector.
*/
Vector3& axis() { return m_axis; }
EIGEN_DEVICE_FUNC Vector3& axis() { return m_axis; }
/** Concatenates two rotations */
inline QuaternionType operator* (const AngleAxis& other) const
EIGEN_DEVICE_FUNC inline QuaternionType operator* (const AngleAxis& other) const
{ return QuaternionType(*this) * QuaternionType(other); }
/** Concatenates two rotations */
inline QuaternionType operator* (const QuaternionType& other) const
EIGEN_DEVICE_FUNC inline QuaternionType operator* (const QuaternionType& other) const
{ return QuaternionType(*this) * other; }
/** Concatenates two rotations */
friend inline QuaternionType operator* (const QuaternionType& a, const AngleAxis& b)
friend EIGEN_DEVICE_FUNC inline QuaternionType operator* (const QuaternionType& a, const AngleAxis& b)
{ return a * QuaternionType(b); }
/** \returns the inverse rotation, i.e., an angle-axis with opposite rotation angle */
AngleAxis inverse() const
EIGEN_DEVICE_FUNC AngleAxis inverse() const
{ return AngleAxis(-m_angle, m_axis); }
template<class QuatDerived>
AngleAxis& operator=(const QuaternionBase<QuatDerived>& q);
EIGEN_DEVICE_FUNC AngleAxis& operator=(const QuaternionBase<QuatDerived>& q);
template<typename Derived>
AngleAxis& operator=(const MatrixBase<Derived>& m);
EIGEN_DEVICE_FUNC AngleAxis& operator=(const MatrixBase<Derived>& m);
template<typename Derived>
AngleAxis& fromRotationMatrix(const MatrixBase<Derived>& m);
Matrix3 toRotationMatrix(void) const;
EIGEN_DEVICE_FUNC AngleAxis& fromRotationMatrix(const MatrixBase<Derived>& m);
EIGEN_DEVICE_FUNC Matrix3 toRotationMatrix(void) const;
/** \returns \c *this with scalar type casted to \a NewScalarType
*
@@ -129,24 +131,24 @@ public:
* then this function smartly returns a const reference to \c *this.
*/
template<typename NewScalarType>
inline typename internal::cast_return_type<AngleAxis,AngleAxis<NewScalarType> >::type cast() const
EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<AngleAxis,AngleAxis<NewScalarType> >::type cast() const
{ return typename internal::cast_return_type<AngleAxis,AngleAxis<NewScalarType> >::type(*this); }
/** Copy constructor with scalar type conversion */
template<typename OtherScalarType>
inline explicit AngleAxis(const AngleAxis<OtherScalarType>& other)
EIGEN_DEVICE_FUNC inline explicit AngleAxis(const AngleAxis<OtherScalarType>& other)
{
m_axis = other.axis().template cast<Scalar>();
m_angle = Scalar(other.angle());
}
static inline const AngleAxis Identity() { return AngleAxis(Scalar(0), Vector3::UnitX()); }
EIGEN_DEVICE_FUNC static inline const AngleAxis Identity() { return AngleAxis(Scalar(0), Vector3::UnitX()); }
/** \returns \c true if \c *this is approximately equal to \a other, within the precision
* determined by \a prec.
*
* \sa MatrixBase::isApprox() */
bool isApprox(const AngleAxis& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
EIGEN_DEVICE_FUNC bool isApprox(const AngleAxis& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
{ return m_axis.isApprox(other.m_axis, prec) && internal::isApprox(m_angle,other.m_angle, prec); }
};
@@ -158,21 +160,26 @@ typedef AngleAxis<float> AngleAxisf;
typedef AngleAxis<double> AngleAxisd;
/** Set \c *this from a \b unit quaternion.
* The resulting axis is normalized.
*
* The resulting axis is normalized, and the computed angle is in the [0,pi] range.
*
* This function implicitly normalizes the quaternion \a q.
*/
template<typename Scalar>
template<typename QuatDerived>
AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived>& q)
EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived>& q)
{
using std::atan2;
EIGEN_USING_STD_MATH(atan2)
EIGEN_USING_STD_MATH(abs)
Scalar n = q.vec().norm();
if(n<NumTraits<Scalar>::epsilon())
n = q.vec().stableNorm();
if (n > Scalar(0))
if (n != Scalar(0))
{
m_angle = Scalar(2)*atan2(n, q.w());
m_angle = Scalar(2)*atan2(n, abs(q.w()));
if(q.w() < 0)
n = -n;
m_axis = q.vec() / n;
}
else
@@ -187,7 +194,7 @@ AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived
*/
template<typename Scalar>
template<typename Derived>
AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const MatrixBase<Derived>& mat)
EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const MatrixBase<Derived>& mat)
{
// Since a direct conversion would not be really faster,
// let's use the robust Quaternion implementation:
@@ -199,7 +206,7 @@ AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const MatrixBase<Derived>& mat)
**/
template<typename Scalar>
template<typename Derived>
AngleAxis<Scalar>& AngleAxis<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)
EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)
{
return *this = QuaternionType(mat);
}
@@ -208,10 +215,10 @@ AngleAxis<Scalar>& AngleAxis<Scalar>::fromRotationMatrix(const MatrixBase<Derive
*/
template<typename Scalar>
typename AngleAxis<Scalar>::Matrix3
AngleAxis<Scalar>::toRotationMatrix(void) const
EIGEN_DEVICE_FUNC AngleAxis<Scalar>::toRotationMatrix(void) const
{
using std::sin;
using std::cos;
EIGEN_USING_STD_MATH(sin)
EIGEN_USING_STD_MATH(cos)
Matrix3 res;
Vector3 sin_axis = sin(m_angle) * m_axis;
Scalar c = cos(m_angle);

View File

@@ -33,12 +33,12 @@ namespace Eigen {
* \sa class AngleAxis
*/
template<typename Derived>
inline Matrix<typename MatrixBase<Derived>::Scalar,3,1>
EIGEN_DEVICE_FUNC inline Matrix<typename MatrixBase<Derived>::Scalar,3,1>
MatrixBase<Derived>::eulerAngles(Index a0, Index a1, Index a2) const
{
using std::atan2;
using std::sin;
using std::cos;
EIGEN_USING_STD_MATH(atan2)
EIGEN_USING_STD_MATH(sin)
EIGEN_USING_STD_MATH(cos)
/* Implemented from Graphics Gems IV */
EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Derived,3,3)

View File

@@ -68,17 +68,17 @@ template<typename MatrixType,int _Direction> class Homogeneous
typedef MatrixBase<Homogeneous> Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Homogeneous)
explicit inline Homogeneous(const MatrixType& matrix)
EIGEN_DEVICE_FUNC explicit inline Homogeneous(const MatrixType& matrix)
: m_matrix(matrix)
{}
inline Index rows() const { return m_matrix.rows() + (int(Direction)==Vertical ? 1 : 0); }
inline Index cols() const { return m_matrix.cols() + (int(Direction)==Horizontal ? 1 : 0); }
EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows() + (int(Direction)==Vertical ? 1 : 0); }
EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols() + (int(Direction)==Horizontal ? 1 : 0); }
const NestedExpression& nestedExpression() const { return m_matrix; }
EIGEN_DEVICE_FUNC const NestedExpression& nestedExpression() const { return m_matrix; }
template<typename Rhs>
inline const Product<Homogeneous,Rhs>
EIGEN_DEVICE_FUNC inline const Product<Homogeneous,Rhs>
operator* (const MatrixBase<Rhs>& rhs) const
{
eigen_assert(int(Direction)==Horizontal);
@@ -86,7 +86,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
}
template<typename Lhs> friend
inline const Product<Lhs,Homogeneous>
EIGEN_DEVICE_FUNC inline const Product<Lhs,Homogeneous>
operator* (const MatrixBase<Lhs>& lhs, const Homogeneous& rhs)
{
eigen_assert(int(Direction)==Vertical);
@@ -94,7 +94,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
}
template<typename Scalar, int Dim, int Mode, int Options> friend
inline const Product<Transform<Scalar,Dim,Mode,Options>, Homogeneous >
EIGEN_DEVICE_FUNC inline const Product<Transform<Scalar,Dim,Mode,Options>, Homogeneous >
operator* (const Transform<Scalar,Dim,Mode,Options>& lhs, const Homogeneous& rhs)
{
eigen_assert(int(Direction)==Vertical);
@@ -102,7 +102,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
}
template<typename Func>
EIGEN_STRONG_INLINE typename internal::result_of<Func(Scalar,Scalar)>::type
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::result_of<Func(Scalar,Scalar)>::type
redux(const Func& func) const
{
return func(m_matrix.redux(func), Scalar(1));
@@ -114,7 +114,9 @@ template<typename MatrixType,int _Direction> class Homogeneous
/** \geometry_module \ingroup Geometry_Module
*
* \return an expression of the equivalent homogeneous vector
* \returns a vector expression that is one longer than the vector argument, with the value 1 symbolically appended as the last coefficient.
*
* This can be used to convert affine coordinates to homogeneous coordinates.
*
* \only_for_vectors
*
@@ -124,7 +126,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
* \sa VectorwiseOp::homogeneous(), class Homogeneous
*/
template<typename Derived>
inline typename MatrixBase<Derived>::HomogeneousReturnType
EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::HomogeneousReturnType
MatrixBase<Derived>::homogeneous() const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
@@ -133,14 +135,16 @@ MatrixBase<Derived>::homogeneous() const
/** \geometry_module \ingroup Geometry_Module
*
* \returns a matrix expression of homogeneous column (or row) vectors
* \returns an expression where the value 1 is symbolically appended as the final coefficient to each column (or row) of the matrix.
*
* This can be used to convert affine coordinates to homogeneous coordinates.
*
* Example: \include VectorwiseOp_homogeneous.cpp
* Output: \verbinclude VectorwiseOp_homogeneous.out
*
* \sa MatrixBase::homogeneous(), class Homogeneous */
template<typename ExpressionType, int Direction>
inline Homogeneous<ExpressionType,Direction>
EIGEN_DEVICE_FUNC inline Homogeneous<ExpressionType,Direction>
VectorwiseOp<ExpressionType,Direction>::homogeneous() const
{
return HomogeneousReturnType(_expression());
@@ -148,14 +152,23 @@ VectorwiseOp<ExpressionType,Direction>::homogeneous() const
/** \geometry_module \ingroup Geometry_Module
*
* \returns an expression of the homogeneous normalized vector of \c *this
* \brief homogeneous normalization
*
* \returns a vector expression of the N-1 first coefficients of \c *this divided by that last coefficient.
*
* This can be used to convert homogeneous coordinates to affine coordinates.
*
* It is essentially a shortcut for:
* \code
this->head(this->size()-1)/this->coeff(this->size()-1);
\endcode
*
* Example: \include MatrixBase_hnormalized.cpp
* Output: \verbinclude MatrixBase_hnormalized.out
*
* \sa VectorwiseOp::hnormalized() */
template<typename Derived>
inline const typename MatrixBase<Derived>::HNormalizedReturnType
EIGEN_DEVICE_FUNC inline const typename MatrixBase<Derived>::HNormalizedReturnType
MatrixBase<Derived>::hnormalized() const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
@@ -166,14 +179,20 @@ MatrixBase<Derived>::hnormalized() const
/** \geometry_module \ingroup Geometry_Module
*
* \returns an expression of the homogeneous normalized vector of \c *this
* \brief column or row-wise homogeneous normalization
*
* \returns an expression of the first N-1 coefficients of each column (or row) of \c *this divided by the last coefficient of each column (or row).
*
* This can be used to convert homogeneous coordinates to affine coordinates.
*
* It is conceptually equivalent to calling MatrixBase::hnormalized() to each column (or row) of \c *this.
*
* Example: \include DirectionWise_hnormalized.cpp
* Output: \verbinclude DirectionWise_hnormalized.out
*
* \sa MatrixBase::hnormalized() */
template<typename ExpressionType, int Direction>
inline const typename VectorwiseOp<ExpressionType,Direction>::HNormalizedReturnType
EIGEN_DEVICE_FUNC inline const typename VectorwiseOp<ExpressionType,Direction>::HNormalizedReturnType
VectorwiseOp<ExpressionType,Direction>::hnormalized() const
{
return HNormalized_Block(_expression(),0,0,
@@ -197,7 +216,7 @@ template<typename MatrixOrTransformType>
struct take_matrix_for_product
{
typedef MatrixOrTransformType type;
static const type& run(const type &x) { return x; }
EIGEN_DEVICE_FUNC static const type& run(const type &x) { return x; }
};
template<typename Scalar, int Dim, int Mode,int Options>
@@ -205,7 +224,7 @@ struct take_matrix_for_product<Transform<Scalar, Dim, Mode, Options> >
{
typedef Transform<Scalar, Dim, Mode, Options> TransformType;
typedef typename internal::add_const<typename TransformType::ConstAffinePart>::type type;
static type run (const TransformType& x) { return x.affine(); }
EIGEN_DEVICE_FUNC static type run (const TransformType& x) { return x.affine(); }
};
template<typename Scalar, int Dim, int Options>
@@ -213,7 +232,7 @@ struct take_matrix_for_product<Transform<Scalar, Dim, Projective, Options> >
{
typedef Transform<Scalar, Dim, Projective, Options> TransformType;
typedef typename TransformType::MatrixType type;
static const type& run (const TransformType& x) { return x.matrix(); }
EIGEN_DEVICE_FUNC static const type& run (const TransformType& x) { return x.matrix(); }
};
template<typename MatrixType,typename Lhs>
@@ -238,15 +257,15 @@ struct homogeneous_left_product_impl<Homogeneous<MatrixType,Vertical>,Lhs>
typedef typename traits<homogeneous_left_product_impl>::LhsMatrixType LhsMatrixType;
typedef typename remove_all<LhsMatrixType>::type LhsMatrixTypeCleaned;
typedef typename remove_all<typename LhsMatrixTypeCleaned::Nested>::type LhsMatrixTypeNested;
homogeneous_left_product_impl(const Lhs& lhs, const MatrixType& rhs)
EIGEN_DEVICE_FUNC homogeneous_left_product_impl(const Lhs& lhs, const MatrixType& rhs)
: m_lhs(take_matrix_for_product<Lhs>::run(lhs)),
m_rhs(rhs)
{}
inline Index rows() const { return m_lhs.rows(); }
inline Index cols() const { return m_rhs.cols(); }
EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
template<typename Dest> void evalTo(Dest& dst) const
template<typename Dest> EIGEN_DEVICE_FUNC void evalTo(Dest& dst) const
{
// FIXME investigate how to allow lazy evaluation of this product when possible
dst = Block<const LhsMatrixTypeNested,
@@ -277,14 +296,14 @@ struct homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs>
: public ReturnByValue<homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs> >
{
typedef typename remove_all<typename Rhs::Nested>::type RhsNested;
homogeneous_right_product_impl(const MatrixType& lhs, const Rhs& rhs)
EIGEN_DEVICE_FUNC homogeneous_right_product_impl(const MatrixType& lhs, const Rhs& rhs)
: m_lhs(lhs), m_rhs(rhs)
{}
inline Index rows() const { return m_lhs.rows(); }
inline Index cols() const { return m_rhs.cols(); }
EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
template<typename Dest> void evalTo(Dest& dst) const
template<typename Dest> EIGEN_DEVICE_FUNC void evalTo(Dest& dst) const
{
// FIXME investigate how to allow lazy evaluation of this product when possible
dst = m_lhs * Block<const RhsNested,
@@ -317,7 +336,7 @@ struct unary_evaluator<Homogeneous<ArgType,Direction>, IndexBased>
typedef typename XprType::PlainObject PlainObject;
typedef evaluator<PlainObject> Base;
explicit unary_evaluator(const XprType& op)
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
: Base(), m_temp(op)
{
::new (static_cast<Base*>(this)) Base(m_temp);
@@ -332,8 +351,13 @@ template< typename DstXprType, typename ArgType, typename Scalar>
struct Assignment<DstXprType, Homogeneous<ArgType,Vertical>, internal::assign_op<Scalar,typename ArgType::Scalar>, Dense2Dense>
{
typedef Homogeneous<ArgType,Vertical> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
dst.template topRows<ArgType::RowsAtCompileTime>(src.nestedExpression().rows()) = src.nestedExpression();
dst.row(dst.rows()-1).setOnes();
}
@@ -344,8 +368,13 @@ template< typename DstXprType, typename ArgType, typename Scalar>
struct Assignment<DstXprType, Homogeneous<ArgType,Horizontal>, internal::assign_op<Scalar,typename ArgType::Scalar>, Dense2Dense>
{
typedef Homogeneous<ArgType,Horizontal> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
dst.template leftCols<ArgType::ColsAtCompileTime>(src.nestedExpression().cols()) = src.nestedExpression();
dst.col(dst.cols()-1).setOnes();
}
@@ -355,7 +384,7 @@ template<typename LhsArg, typename Rhs, int ProductTag>
struct generic_product_impl<Homogeneous<LhsArg,Horizontal>, Rhs, HomogeneousShape, DenseShape, ProductTag>
{
template<typename Dest>
static void evalTo(Dest& dst, const Homogeneous<LhsArg,Horizontal>& lhs, const Rhs& rhs)
EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const Homogeneous<LhsArg,Horizontal>& lhs, const Rhs& rhs)
{
homogeneous_right_product_impl<Homogeneous<LhsArg,Horizontal>, Rhs>(lhs.nestedExpression(), rhs).evalTo(dst);
}
@@ -396,12 +425,24 @@ template<typename Lhs, typename RhsArg, int ProductTag>
struct generic_product_impl<Lhs, Homogeneous<RhsArg,Vertical>, DenseShape, HomogeneousShape, ProductTag>
{
template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
{
homogeneous_left_product_impl<Homogeneous<RhsArg,Vertical>, Lhs>(lhs, rhs.nestedExpression()).evalTo(dst);
}
};
// TODO: the following specialization is to address a regression from 3.2 to 3.3
// In the future, this path should be optimized.
template<typename Lhs, typename RhsArg, int ProductTag>
struct generic_product_impl<Lhs, Homogeneous<RhsArg,Vertical>, TriangularShape, HomogeneousShape, ProductTag>
{
template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
{
dst.noalias() = lhs * rhs.eval();
}
};
template<typename Lhs,typename Rhs>
struct homogeneous_left_product_refactoring_helper
{
@@ -438,7 +479,7 @@ struct generic_product_impl<Transform<Scalar,Dim,Mode,Options>, Homogeneous<RhsA
{
typedef Transform<Scalar,Dim,Mode,Options> TransformType;
template<typename Dest>
static void evalTo(Dest& dst, const TransformType& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const TransformType& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
{
homogeneous_left_product_impl<Homogeneous<RhsArg,Vertical>, TransformType>(lhs, rhs.nestedExpression()).evalTo(dst);
}

View File

@@ -50,21 +50,21 @@ public:
typedef const Block<const Coefficients,AmbientDimAtCompileTime,1> ConstNormalReturnType;
/** Default constructor without initialization */
inline Hyperplane() {}
EIGEN_DEVICE_FUNC inline Hyperplane() {}
template<int OtherOptions>
Hyperplane(const Hyperplane<Scalar,AmbientDimAtCompileTime,OtherOptions>& other)
EIGEN_DEVICE_FUNC Hyperplane(const Hyperplane<Scalar,AmbientDimAtCompileTime,OtherOptions>& other)
: m_coeffs(other.coeffs())
{}
/** Constructs a dynamic-size hyperplane with \a _dim the dimension
* of the ambient space */
inline explicit Hyperplane(Index _dim) : m_coeffs(_dim+1) {}
EIGEN_DEVICE_FUNC inline explicit Hyperplane(Index _dim) : m_coeffs(_dim+1) {}
/** Construct a plane from its normal \a n and a point \a e onto the plane.
* \warning the vector normal is assumed to be normalized.
*/
inline Hyperplane(const VectorType& n, const VectorType& e)
EIGEN_DEVICE_FUNC inline Hyperplane(const VectorType& n, const VectorType& e)
: m_coeffs(n.size()+1)
{
normal() = n;
@@ -75,7 +75,7 @@ public:
* such that the algebraic equation of the plane is \f$ n \cdot x + d = 0 \f$.
* \warning the vector normal is assumed to be normalized.
*/
inline Hyperplane(const VectorType& n, const Scalar& d)
EIGEN_DEVICE_FUNC inline Hyperplane(const VectorType& n, const Scalar& d)
: m_coeffs(n.size()+1)
{
normal() = n;
@@ -85,7 +85,7 @@ public:
/** Constructs a hyperplane passing through the two points. If the dimension of the ambient space
* is greater than 2, then there isn't uniqueness, so an arbitrary choice is made.
*/
static inline Hyperplane Through(const VectorType& p0, const VectorType& p1)
EIGEN_DEVICE_FUNC static inline Hyperplane Through(const VectorType& p0, const VectorType& p1)
{
Hyperplane result(p0.size());
result.normal() = (p1 - p0).unitOrthogonal();
@@ -96,7 +96,7 @@ public:
/** Constructs a hyperplane passing through the three points. The dimension of the ambient space
* is required to be exactly 3.
*/
static inline Hyperplane Through(const VectorType& p0, const VectorType& p1, const VectorType& p2)
EIGEN_DEVICE_FUNC static inline Hyperplane Through(const VectorType& p0, const VectorType& p1, const VectorType& p2)
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 3)
Hyperplane result(p0.size());
@@ -120,19 +120,19 @@ public:
* so an arbitrary choice is made.
*/
// FIXME to be consitent with the rest this could be implemented as a static Through function ??
explicit Hyperplane(const ParametrizedLine<Scalar, AmbientDimAtCompileTime>& parametrized)
EIGEN_DEVICE_FUNC explicit Hyperplane(const ParametrizedLine<Scalar, AmbientDimAtCompileTime>& parametrized)
{
normal() = parametrized.direction().unitOrthogonal();
offset() = -parametrized.origin().dot(normal());
}
~Hyperplane() {}
EIGEN_DEVICE_FUNC ~Hyperplane() {}
/** \returns the dimension in which the plane holds */
inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_coeffs.size()-1 : Index(AmbientDimAtCompileTime); }
EIGEN_DEVICE_FUNC inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_coeffs.size()-1 : Index(AmbientDimAtCompileTime); }
/** normalizes \c *this */
void normalize(void)
EIGEN_DEVICE_FUNC void normalize(void)
{
m_coeffs /= normal().norm();
}
@@ -140,45 +140,45 @@ public:
/** \returns the signed distance between the plane \c *this and a point \a p.
* \sa absDistance()
*/
inline Scalar signedDistance(const VectorType& p) const { return normal().dot(p) + offset(); }
EIGEN_DEVICE_FUNC inline Scalar signedDistance(const VectorType& p) const { return normal().dot(p) + offset(); }
/** \returns the absolute distance between the plane \c *this and a point \a p.
* \sa signedDistance()
*/
inline Scalar absDistance(const VectorType& p) const { using std::abs; return abs(signedDistance(p)); }
EIGEN_DEVICE_FUNC inline Scalar absDistance(const VectorType& p) const { return numext::abs(signedDistance(p)); }
/** \returns the projection of a point \a p onto the plane \c *this.
*/
inline VectorType projection(const VectorType& p) const { return p - signedDistance(p) * normal(); }
EIGEN_DEVICE_FUNC inline VectorType projection(const VectorType& p) const { return p - signedDistance(p) * normal(); }
/** \returns a constant reference to the unit normal vector of the plane, which corresponds
* to the linear part of the implicit equation.
*/
inline ConstNormalReturnType normal() const { return ConstNormalReturnType(m_coeffs,0,0,dim(),1); }
EIGEN_DEVICE_FUNC inline ConstNormalReturnType normal() const { return ConstNormalReturnType(m_coeffs,0,0,dim(),1); }
/** \returns a non-constant reference to the unit normal vector of the plane, which corresponds
* to the linear part of the implicit equation.
*/
inline NormalReturnType normal() { return NormalReturnType(m_coeffs,0,0,dim(),1); }
EIGEN_DEVICE_FUNC inline NormalReturnType normal() { return NormalReturnType(m_coeffs,0,0,dim(),1); }
/** \returns the distance to the origin, which is also the "constant term" of the implicit equation
* \warning the vector normal is assumed to be normalized.
*/
inline const Scalar& offset() const { return m_coeffs.coeff(dim()); }
EIGEN_DEVICE_FUNC inline const Scalar& offset() const { return m_coeffs.coeff(dim()); }
/** \returns a non-constant reference to the distance to the origin, which is also the constant part
* of the implicit equation */
inline Scalar& offset() { return m_coeffs(dim()); }
EIGEN_DEVICE_FUNC inline Scalar& offset() { return m_coeffs(dim()); }
/** \returns a constant reference to the coefficients c_i of the plane equation:
* \f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \f$
*/
inline const Coefficients& coeffs() const { return m_coeffs; }
EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs; }
/** \returns a non-constant reference to the coefficients c_i of the plane equation:
* \f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \f$
*/
inline Coefficients& coeffs() { return m_coeffs; }
EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs; }
/** \returns the intersection of *this with \a other.
*
@@ -186,16 +186,15 @@ public:
*
* \note If \a other is approximately parallel to *this, this method will return any point on *this.
*/
VectorType intersection(const Hyperplane& other) const
EIGEN_DEVICE_FUNC VectorType intersection(const Hyperplane& other) const
{
using std::abs;
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2)
Scalar det = coeffs().coeff(0) * other.coeffs().coeff(1) - coeffs().coeff(1) * other.coeffs().coeff(0);
// since the line equations ax+by=c are normalized with a^2+b^2=1, the following tests
// whether the two lines are approximately parallel.
if(internal::isMuchSmallerThan(det, Scalar(1)))
{ // special case where the two lines are approximately parallel. Pick any point on the first line.
if(abs(coeffs().coeff(1))>abs(coeffs().coeff(0)))
if(numext::abs(coeffs().coeff(1))>numext::abs(coeffs().coeff(0)))
return VectorType(coeffs().coeff(1), -coeffs().coeff(2)/coeffs().coeff(1)-coeffs().coeff(0));
else
return VectorType(-coeffs().coeff(2)/coeffs().coeff(0)-coeffs().coeff(1), coeffs().coeff(0));
@@ -215,7 +214,7 @@ public:
* or a more generic #Affine transformation. The default is #Affine.
*/
template<typename XprType>
inline Hyperplane& transform(const MatrixBase<XprType>& mat, TransformTraits traits = Affine)
EIGEN_DEVICE_FUNC inline Hyperplane& transform(const MatrixBase<XprType>& mat, TransformTraits traits = Affine)
{
if (traits==Affine)
normal() = mat.inverse().transpose() * normal();
@@ -236,7 +235,7 @@ public:
* Other kind of transformations are not supported.
*/
template<int TrOptions>
inline Hyperplane& transform(const Transform<Scalar,AmbientDimAtCompileTime,Affine,TrOptions>& t,
EIGEN_DEVICE_FUNC inline Hyperplane& transform(const Transform<Scalar,AmbientDimAtCompileTime,Affine,TrOptions>& t,
TransformTraits traits = Affine)
{
transform(t.linear(), traits);
@@ -250,7 +249,7 @@ public:
* then this function smartly returns a const reference to \c *this.
*/
template<typename NewScalarType>
inline typename internal::cast_return_type<Hyperplane,
EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Hyperplane,
Hyperplane<NewScalarType,AmbientDimAtCompileTime,Options> >::type cast() const
{
return typename internal::cast_return_type<Hyperplane,
@@ -259,7 +258,7 @@ public:
/** Copy constructor with scalar type conversion */
template<typename OtherScalarType,int OtherOptions>
inline explicit Hyperplane(const Hyperplane<OtherScalarType,AmbientDimAtCompileTime,OtherOptions>& other)
EIGEN_DEVICE_FUNC inline explicit Hyperplane(const Hyperplane<OtherScalarType,AmbientDimAtCompileTime,OtherOptions>& other)
{ m_coeffs = other.coeffs().template cast<Scalar>(); }
/** \returns \c true if \c *this is approximately equal to \a other, within the precision
@@ -267,7 +266,7 @@ public:
*
* \sa MatrixBase::isApprox() */
template<int OtherOptions>
bool isApprox(const Hyperplane<Scalar,AmbientDimAtCompileTime,OtherOptions>& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
EIGEN_DEVICE_FUNC bool isApprox(const Hyperplane<Scalar,AmbientDimAtCompileTime,OtherOptions>& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
{ return m_coeffs.isApprox(other.m_coeffs, prec); }
protected:

View File

@@ -27,7 +27,7 @@ namespace Eigen {
template<typename Derived>
template<typename OtherDerived>
#ifndef EIGEN_PARSED_BY_DOXYGEN
inline typename MatrixBase<Derived>::template cross_product_return_type<OtherDerived>::type
EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::template cross_product_return_type<OtherDerived>::type
#else
inline typename MatrixBase<Derived>::PlainObject
#endif
@@ -53,7 +53,7 @@ template< int Arch,typename VectorLhs,typename VectorRhs,
typename Scalar = typename VectorLhs::Scalar,
bool Vectorizable = bool((VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit)>
struct cross3_impl {
static inline typename internal::plain_matrix_type<VectorLhs>::type
EIGEN_DEVICE_FUNC static inline typename internal::plain_matrix_type<VectorLhs>::type
run(const VectorLhs& lhs, const VectorRhs& rhs)
{
return typename internal::plain_matrix_type<VectorLhs>::type(
@@ -78,7 +78,7 @@ struct cross3_impl {
*/
template<typename Derived>
template<typename OtherDerived>
inline typename MatrixBase<Derived>::PlainObject
EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::PlainObject
MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,4)
@@ -105,6 +105,7 @@ MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const
* \sa MatrixBase::cross() */
template<typename ExpressionType, int Direction>
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
const typename VectorwiseOp<ExpressionType,Direction>::CrossReturnType
VectorwiseOp<ExpressionType,Direction>::cross(const MatrixBase<OtherDerived>& other) const
{
@@ -221,7 +222,7 @@ struct unitOrthogonal_selector<Derived,2>
* \sa cross()
*/
template<typename Derived>
typename MatrixBase<Derived>::PlainObject
EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::PlainObject
MatrixBase<Derived>::unitOrthogonal() const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)

View File

@@ -41,45 +41,45 @@ public:
typedef Matrix<Scalar,AmbientDimAtCompileTime,1,Options> VectorType;
/** Default constructor without initialization */
inline ParametrizedLine() {}
EIGEN_DEVICE_FUNC inline ParametrizedLine() {}
template<int OtherOptions>
ParametrizedLine(const ParametrizedLine<Scalar,AmbientDimAtCompileTime,OtherOptions>& other)
EIGEN_DEVICE_FUNC ParametrizedLine(const ParametrizedLine<Scalar,AmbientDimAtCompileTime,OtherOptions>& other)
: m_origin(other.origin()), m_direction(other.direction())
{}
/** Constructs a dynamic-size line with \a _dim the dimension
* of the ambient space */
inline explicit ParametrizedLine(Index _dim) : m_origin(_dim), m_direction(_dim) {}
EIGEN_DEVICE_FUNC inline explicit ParametrizedLine(Index _dim) : m_origin(_dim), m_direction(_dim) {}
/** Initializes a parametrized line of direction \a direction and origin \a origin.
* \warning the vector direction is assumed to be normalized.
*/
ParametrizedLine(const VectorType& origin, const VectorType& direction)
EIGEN_DEVICE_FUNC ParametrizedLine(const VectorType& origin, const VectorType& direction)
: m_origin(origin), m_direction(direction) {}
template <int OtherOptions>
explicit ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane);
EIGEN_DEVICE_FUNC explicit ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane);
/** Constructs a parametrized line going from \a p0 to \a p1. */
static inline ParametrizedLine Through(const VectorType& p0, const VectorType& p1)
EIGEN_DEVICE_FUNC static inline ParametrizedLine Through(const VectorType& p0, const VectorType& p1)
{ return ParametrizedLine(p0, (p1-p0).normalized()); }
~ParametrizedLine() {}
EIGEN_DEVICE_FUNC ~ParametrizedLine() {}
/** \returns the dimension in which the line holds */
inline Index dim() const { return m_direction.size(); }
EIGEN_DEVICE_FUNC inline Index dim() const { return m_direction.size(); }
const VectorType& origin() const { return m_origin; }
VectorType& origin() { return m_origin; }
EIGEN_DEVICE_FUNC const VectorType& origin() const { return m_origin; }
EIGEN_DEVICE_FUNC VectorType& origin() { return m_origin; }
const VectorType& direction() const { return m_direction; }
VectorType& direction() { return m_direction; }
EIGEN_DEVICE_FUNC const VectorType& direction() const { return m_direction; }
EIGEN_DEVICE_FUNC VectorType& direction() { return m_direction; }
/** \returns the squared distance of a point \a p to its projection onto the line \c *this.
* \sa distance()
*/
RealScalar squaredDistance(const VectorType& p) const
EIGEN_DEVICE_FUNC RealScalar squaredDistance(const VectorType& p) const
{
VectorType diff = p - origin();
return (diff - direction().dot(diff) * direction()).squaredNorm();
@@ -87,22 +87,22 @@ public:
/** \returns the distance of a point \a p to its projection onto the line \c *this.
* \sa squaredDistance()
*/
RealScalar distance(const VectorType& p) const { using std::sqrt; return sqrt(squaredDistance(p)); }
EIGEN_DEVICE_FUNC RealScalar distance(const VectorType& p) const { EIGEN_USING_STD_MATH(sqrt) return sqrt(squaredDistance(p)); }
/** \returns the projection of a point \a p onto the line \c *this. */
VectorType projection(const VectorType& p) const
EIGEN_DEVICE_FUNC VectorType projection(const VectorType& p) const
{ return origin() + direction().dot(p-origin()) * direction(); }
VectorType pointAt(const Scalar& t) const;
EIGEN_DEVICE_FUNC VectorType pointAt(const Scalar& t) const;
template <int OtherOptions>
Scalar intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
EIGEN_DEVICE_FUNC Scalar intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
template <int OtherOptions>
Scalar intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
EIGEN_DEVICE_FUNC Scalar intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
template <int OtherOptions>
VectorType intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
EIGEN_DEVICE_FUNC VectorType intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
/** \returns \c *this with scalar type casted to \a NewScalarType
*
@@ -110,7 +110,7 @@ public:
* then this function smartly returns a const reference to \c *this.
*/
template<typename NewScalarType>
inline typename internal::cast_return_type<ParametrizedLine,
EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<ParametrizedLine,
ParametrizedLine<NewScalarType,AmbientDimAtCompileTime,Options> >::type cast() const
{
return typename internal::cast_return_type<ParametrizedLine,
@@ -119,7 +119,7 @@ public:
/** Copy constructor with scalar type conversion */
template<typename OtherScalarType,int OtherOptions>
inline explicit ParametrizedLine(const ParametrizedLine<OtherScalarType,AmbientDimAtCompileTime,OtherOptions>& other)
EIGEN_DEVICE_FUNC inline explicit ParametrizedLine(const ParametrizedLine<OtherScalarType,AmbientDimAtCompileTime,OtherOptions>& other)
{
m_origin = other.origin().template cast<Scalar>();
m_direction = other.direction().template cast<Scalar>();
@@ -129,7 +129,7 @@ public:
* determined by \a prec.
*
* \sa MatrixBase::isApprox() */
bool isApprox(const ParametrizedLine& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
EIGEN_DEVICE_FUNC bool isApprox(const ParametrizedLine& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
{ return m_origin.isApprox(other.m_origin, prec) && m_direction.isApprox(other.m_direction, prec); }
protected:
@@ -143,7 +143,7 @@ protected:
*/
template <typename _Scalar, int _AmbientDim, int _Options>
template <int OtherOptions>
inline ParametrizedLine<_Scalar, _AmbientDim,_Options>::ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim,OtherOptions>& hyperplane)
EIGEN_DEVICE_FUNC inline ParametrizedLine<_Scalar, _AmbientDim,_Options>::ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim,OtherOptions>& hyperplane)
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2)
direction() = hyperplane.normal().unitOrthogonal();
@@ -153,7 +153,7 @@ inline ParametrizedLine<_Scalar, _AmbientDim,_Options>::ParametrizedLine(const H
/** \returns the point at \a t along this line
*/
template <typename _Scalar, int _AmbientDim, int _Options>
inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType
EIGEN_DEVICE_FUNC inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType
ParametrizedLine<_Scalar, _AmbientDim,_Options>::pointAt(const _Scalar& t) const
{
return origin() + (direction()*t);
@@ -163,7 +163,7 @@ ParametrizedLine<_Scalar, _AmbientDim,_Options>::pointAt(const _Scalar& t) const
*/
template <typename _Scalar, int _AmbientDim, int _Options>
template <int OtherOptions>
inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
EIGEN_DEVICE_FUNC inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
{
return -(hyperplane.offset()+hyperplane.normal().dot(origin()))
/ hyperplane.normal().dot(direction());
@@ -175,7 +175,7 @@ inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionPara
*/
template <typename _Scalar, int _AmbientDim, int _Options>
template <int OtherOptions>
inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
EIGEN_DEVICE_FUNC inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
{
return intersectionParameter(hyperplane);
}
@@ -184,7 +184,7 @@ inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersection(con
*/
template <typename _Scalar, int _AmbientDim, int _Options>
template <int OtherOptions>
inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType
EIGEN_DEVICE_FUNC inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType
ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
{
return pointAt(intersectionParameter(hyperplane));

View File

@@ -58,37 +58,37 @@ class QuaternionBase : public RotationBase<Derived, 3>
/** \returns the \c x coefficient */
inline Scalar x() const { return this->derived().coeffs().coeff(0); }
EIGEN_DEVICE_FUNC inline Scalar x() const { return this->derived().coeffs().coeff(0); }
/** \returns the \c y coefficient */
inline Scalar y() const { return this->derived().coeffs().coeff(1); }
EIGEN_DEVICE_FUNC inline Scalar y() const { return this->derived().coeffs().coeff(1); }
/** \returns the \c z coefficient */
inline Scalar z() const { return this->derived().coeffs().coeff(2); }
EIGEN_DEVICE_FUNC inline Scalar z() const { return this->derived().coeffs().coeff(2); }
/** \returns the \c w coefficient */
inline Scalar w() const { return this->derived().coeffs().coeff(3); }
EIGEN_DEVICE_FUNC inline Scalar w() const { return this->derived().coeffs().coeff(3); }
/** \returns a reference to the \c x coefficient */
inline Scalar& x() { return this->derived().coeffs().coeffRef(0); }
EIGEN_DEVICE_FUNC inline Scalar& x() { return this->derived().coeffs().coeffRef(0); }
/** \returns a reference to the \c y coefficient */
inline Scalar& y() { return this->derived().coeffs().coeffRef(1); }
EIGEN_DEVICE_FUNC inline Scalar& y() { return this->derived().coeffs().coeffRef(1); }
/** \returns a reference to the \c z coefficient */
inline Scalar& z() { return this->derived().coeffs().coeffRef(2); }
EIGEN_DEVICE_FUNC inline Scalar& z() { return this->derived().coeffs().coeffRef(2); }
/** \returns a reference to the \c w coefficient */
inline Scalar& w() { return this->derived().coeffs().coeffRef(3); }
EIGEN_DEVICE_FUNC inline Scalar& w() { return this->derived().coeffs().coeffRef(3); }
/** \returns a read-only vector expression of the imaginary part (x,y,z) */
inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); }
EIGEN_DEVICE_FUNC inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); }
/** \returns a vector expression of the imaginary part (x,y,z) */
inline VectorBlock<Coefficients,3> vec() { return coeffs().template head<3>(); }
EIGEN_DEVICE_FUNC inline VectorBlock<Coefficients,3> vec() { return coeffs().template head<3>(); }
/** \returns a read-only vector expression of the coefficients (x,y,z,w) */
inline const typename internal::traits<Derived>::Coefficients& coeffs() const { return derived().coeffs(); }
EIGEN_DEVICE_FUNC inline const typename internal::traits<Derived>::Coefficients& coeffs() const { return derived().coeffs(); }
/** \returns a vector expression of the coefficients (x,y,z,w) */
inline typename internal::traits<Derived>::Coefficients& coeffs() { return derived().coeffs(); }
EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Coefficients& coeffs() { return derived().coeffs(); }
EIGEN_STRONG_INLINE QuaternionBase<Derived>& operator=(const QuaternionBase<Derived>& other);
template<class OtherDerived> EIGEN_STRONG_INLINE Derived& operator=(const QuaternionBase<OtherDerived>& other);
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE QuaternionBase<Derived>& operator=(const QuaternionBase<Derived>& other);
template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const QuaternionBase<OtherDerived>& other);
// disabled this copy operator as it is giving very strange compilation errors when compiling
// test_stdvector with GCC 4.4.2. This looks like a GCC bug though, so feel free to re-enable it if it's
@@ -97,72 +97,72 @@ class QuaternionBase : public RotationBase<Derived, 3>
// Derived& operator=(const QuaternionBase& other)
// { return operator=<Derived>(other); }
Derived& operator=(const AngleAxisType& aa);
template<class OtherDerived> Derived& operator=(const MatrixBase<OtherDerived>& m);
EIGEN_DEVICE_FUNC Derived& operator=(const AngleAxisType& aa);
template<class OtherDerived> EIGEN_DEVICE_FUNC Derived& operator=(const MatrixBase<OtherDerived>& m);
/** \returns a quaternion representing an identity rotation
* \sa MatrixBase::Identity()
*/
static inline Quaternion<Scalar> Identity() { return Quaternion<Scalar>(Scalar(1), Scalar(0), Scalar(0), Scalar(0)); }
EIGEN_DEVICE_FUNC static inline Quaternion<Scalar> Identity() { return Quaternion<Scalar>(Scalar(1), Scalar(0), Scalar(0), Scalar(0)); }
/** \sa QuaternionBase::Identity(), MatrixBase::setIdentity()
*/
inline QuaternionBase& setIdentity() { coeffs() << Scalar(0), Scalar(0), Scalar(0), Scalar(1); return *this; }
EIGEN_DEVICE_FUNC inline QuaternionBase& setIdentity() { coeffs() << Scalar(0), Scalar(0), Scalar(0), Scalar(1); return *this; }
/** \returns the squared norm of the quaternion's coefficients
* \sa QuaternionBase::norm(), MatrixBase::squaredNorm()
*/
inline Scalar squaredNorm() const { return coeffs().squaredNorm(); }
EIGEN_DEVICE_FUNC inline Scalar squaredNorm() const { return coeffs().squaredNorm(); }
/** \returns the norm of the quaternion's coefficients
* \sa QuaternionBase::squaredNorm(), MatrixBase::norm()
*/
inline Scalar norm() const { return coeffs().norm(); }
EIGEN_DEVICE_FUNC inline Scalar norm() const { return coeffs().norm(); }
/** Normalizes the quaternion \c *this
* \sa normalized(), MatrixBase::normalize() */
inline void normalize() { coeffs().normalize(); }
EIGEN_DEVICE_FUNC inline void normalize() { coeffs().normalize(); }
/** \returns a normalized copy of \c *this
* \sa normalize(), MatrixBase::normalized() */
inline Quaternion<Scalar> normalized() const { return Quaternion<Scalar>(coeffs().normalized()); }
EIGEN_DEVICE_FUNC inline Quaternion<Scalar> normalized() const { return Quaternion<Scalar>(coeffs().normalized()); }
/** \returns the dot product of \c *this and \a other
* Geometrically speaking, the dot product of two unit quaternions
* corresponds to the cosine of half the angle between the two rotations.
* \sa angularDistance()
*/
template<class OtherDerived> inline Scalar dot(const QuaternionBase<OtherDerived>& other) const { return coeffs().dot(other.coeffs()); }
template<class OtherDerived> EIGEN_DEVICE_FUNC inline Scalar dot(const QuaternionBase<OtherDerived>& other) const { return coeffs().dot(other.coeffs()); }
template<class OtherDerived> Scalar angularDistance(const QuaternionBase<OtherDerived>& other) const;
template<class OtherDerived> EIGEN_DEVICE_FUNC Scalar angularDistance(const QuaternionBase<OtherDerived>& other) const;
/** \returns an equivalent 3x3 rotation matrix */
Matrix3 toRotationMatrix() const;
EIGEN_DEVICE_FUNC Matrix3 toRotationMatrix() const;
/** \returns the quaternion which transform \a a into \a b through a rotation */
template<typename Derived1, typename Derived2>
Derived& setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
EIGEN_DEVICE_FUNC Derived& setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
template<class OtherDerived> EIGEN_STRONG_INLINE Quaternion<Scalar> operator* (const QuaternionBase<OtherDerived>& q) const;
template<class OtherDerived> EIGEN_STRONG_INLINE Derived& operator*= (const QuaternionBase<OtherDerived>& q);
template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion<Scalar> operator* (const QuaternionBase<OtherDerived>& q) const;
template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*= (const QuaternionBase<OtherDerived>& q);
/** \returns the quaternion describing the inverse rotation */
Quaternion<Scalar> inverse() const;
EIGEN_DEVICE_FUNC Quaternion<Scalar> inverse() const;
/** \returns the conjugated quaternion */
Quaternion<Scalar> conjugate() const;
EIGEN_DEVICE_FUNC Quaternion<Scalar> conjugate() const;
template<class OtherDerived> Quaternion<Scalar> slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const;
template<class OtherDerived> EIGEN_DEVICE_FUNC Quaternion<Scalar> slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const;
/** \returns \c true if \c *this is approximately equal to \a other, within the precision
* determined by \a prec.
*
* \sa MatrixBase::isApprox() */
template<class OtherDerived>
bool isApprox(const QuaternionBase<OtherDerived>& other, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const
EIGEN_DEVICE_FUNC bool isApprox(const QuaternionBase<OtherDerived>& other, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const
{ return coeffs().isApprox(other.coeffs(), prec); }
/** return the result vector of \a v through the rotation*/
EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3& v) const;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3& v) const;
/** \returns \c *this with scalar type casted to \a NewScalarType
*
@@ -170,7 +170,7 @@ class QuaternionBase : public RotationBase<Derived, 3>
* then this function smartly returns a const reference to \c *this.
*/
template<typename NewScalarType>
inline typename internal::cast_return_type<Derived,Quaternion<NewScalarType> >::type cast() const
EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Derived,Quaternion<NewScalarType> >::type cast() const
{
return typename internal::cast_return_type<Derived,Quaternion<NewScalarType> >::type(derived());
}
@@ -239,7 +239,7 @@ public:
typedef typename Base::AngleAxisType AngleAxisType;
/** Default constructor leaving the quaternion uninitialized. */
inline Quaternion() {}
EIGEN_DEVICE_FUNC inline Quaternion() {}
/** Constructs and initializes the quaternion \f$ w+xi+yj+zk \f$ from
* its four coefficients \a w, \a x, \a y and \a z.
@@ -248,36 +248,36 @@ public:
* while internally the coefficients are stored in the following order:
* [\c x, \c y, \c z, \c w]
*/
inline Quaternion(const Scalar& w, const Scalar& x, const Scalar& y, const Scalar& z) : m_coeffs(x, y, z, w){}
EIGEN_DEVICE_FUNC inline Quaternion(const Scalar& w, const Scalar& x, const Scalar& y, const Scalar& z) : m_coeffs(x, y, z, w){}
/** Constructs and initialize a quaternion from the array data */
explicit inline Quaternion(const Scalar* data) : m_coeffs(data) {}
EIGEN_DEVICE_FUNC explicit inline Quaternion(const Scalar* data) : m_coeffs(data) {}
/** Copy constructor */
template<class Derived> EIGEN_STRONG_INLINE Quaternion(const QuaternionBase<Derived>& other) { this->Base::operator=(other); }
template<class Derived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion(const QuaternionBase<Derived>& other) { this->Base::operator=(other); }
/** Constructs and initializes a quaternion from the angle-axis \a aa */
explicit inline Quaternion(const AngleAxisType& aa) { *this = aa; }
EIGEN_DEVICE_FUNC explicit inline Quaternion(const AngleAxisType& aa) { *this = aa; }
/** Constructs and initializes a quaternion from either:
* - a rotation matrix expression,
* - a 4D vector expression representing quaternion coefficients.
*/
template<typename Derived>
explicit inline Quaternion(const MatrixBase<Derived>& other) { *this = other; }
EIGEN_DEVICE_FUNC explicit inline Quaternion(const MatrixBase<Derived>& other) { *this = other; }
/** Explicit copy constructor with scalar conversion */
template<typename OtherScalar, int OtherOptions>
explicit inline Quaternion(const Quaternion<OtherScalar, OtherOptions>& other)
EIGEN_DEVICE_FUNC explicit inline Quaternion(const Quaternion<OtherScalar, OtherOptions>& other)
{ m_coeffs = other.coeffs().template cast<Scalar>(); }
static Quaternion UnitRandom();
EIGEN_DEVICE_FUNC static Quaternion UnitRandom();
template<typename Derived1, typename Derived2>
static Quaternion FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
EIGEN_DEVICE_FUNC static Quaternion FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
inline Coefficients& coeffs() { return m_coeffs;}
inline const Coefficients& coeffs() const { return m_coeffs;}
EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs;}
EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs;}
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(NeedsAlignment))
@@ -357,9 +357,9 @@ class Map<const Quaternion<_Scalar>, _Options >
* \code *coeffs == {x, y, z, w} \endcode
*
* If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */
explicit EIGEN_STRONG_INLINE Map(const Scalar* coeffs) : m_coeffs(coeffs) {}
EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE Map(const Scalar* coeffs) : m_coeffs(coeffs) {}
inline const Coefficients& coeffs() const { return m_coeffs;}
EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs;}
protected:
const Coefficients m_coeffs;
@@ -394,10 +394,10 @@ class Map<Quaternion<_Scalar>, _Options >
* \code *coeffs == {x, y, z, w} \endcode
*
* If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */
explicit EIGEN_STRONG_INLINE Map(Scalar* coeffs) : m_coeffs(coeffs) {}
EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE Map(Scalar* coeffs) : m_coeffs(coeffs) {}
inline Coefficients& coeffs() { return m_coeffs; }
inline const Coefficients& coeffs() const { return m_coeffs; }
EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs; }
EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs; }
protected:
Coefficients m_coeffs;
@@ -425,7 +425,7 @@ typedef Map<Quaternion<double>, Aligned> QuaternionMapAlignedd;
namespace internal {
template<int Arch, class Derived1, class Derived2, typename Scalar, int _Options> struct quat_product
{
static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){
return Quaternion<Scalar>
(
a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(),
@@ -440,7 +440,7 @@ template<int Arch, class Derived1, class Derived2, typename Scalar, int _Options
/** \returns the concatenation of two rotations as a quaternion-quaternion product */
template <class Derived>
template <class OtherDerived>
EIGEN_STRONG_INLINE Quaternion<typename internal::traits<Derived>::Scalar>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion<typename internal::traits<Derived>::Scalar>
QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename OtherDerived::Scalar>::value),
@@ -453,7 +453,7 @@ QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) c
/** \sa operator*(Quaternion) */
template <class Derived>
template <class OtherDerived>
EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator*= (const QuaternionBase<OtherDerived>& other)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator*= (const QuaternionBase<OtherDerived>& other)
{
derived() = derived() * other.derived();
return derived();
@@ -467,7 +467,7 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator*= (const Quaterni
* - Via a Matrix3: 24 + 15n
*/
template <class Derived>
EIGEN_STRONG_INLINE typename QuaternionBase<Derived>::Vector3
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename QuaternionBase<Derived>::Vector3
QuaternionBase<Derived>::_transformVector(const Vector3& v) const
{
// Note that this algorithm comes from the optimization by hand
@@ -481,7 +481,7 @@ QuaternionBase<Derived>::_transformVector(const Vector3& v) const
}
template<class Derived>
EIGEN_STRONG_INLINE QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(const QuaternionBase<Derived>& other)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(const QuaternionBase<Derived>& other)
{
coeffs() = other.coeffs();
return derived();
@@ -489,7 +489,7 @@ EIGEN_STRONG_INLINE QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(
template<class Derived>
template<class OtherDerived>
EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const QuaternionBase<OtherDerived>& other)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const QuaternionBase<OtherDerived>& other)
{
coeffs() = other.coeffs();
return derived();
@@ -498,10 +498,10 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const Quaternion
/** Set \c *this from an angle-axis \a aa and returns a reference to \c *this
*/
template<class Derived>
EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const AngleAxisType& aa)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const AngleAxisType& aa)
{
using std::cos;
using std::sin;
EIGEN_USING_STD_MATH(cos)
EIGEN_USING_STD_MATH(sin)
Scalar ha = Scalar(0.5)*aa.angle(); // Scalar(0.5) to suppress precision loss warnings
this->w() = cos(ha);
this->vec() = sin(ha) * aa.axis();
@@ -516,7 +516,7 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const AngleAxisT
template<class Derived>
template<class MatrixDerived>
inline Derived& QuaternionBase<Derived>::operator=(const MatrixBase<MatrixDerived>& xpr)
EIGEN_DEVICE_FUNC inline Derived& QuaternionBase<Derived>::operator=(const MatrixBase<MatrixDerived>& xpr)
{
EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename MatrixDerived::Scalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
@@ -528,7 +528,7 @@ inline Derived& QuaternionBase<Derived>::operator=(const MatrixBase<MatrixDerive
* be normalized, otherwise the result is undefined.
*/
template<class Derived>
inline typename QuaternionBase<Derived>::Matrix3
EIGEN_DEVICE_FUNC inline typename QuaternionBase<Derived>::Matrix3
QuaternionBase<Derived>::toRotationMatrix(void) const
{
// NOTE if inlined, then gcc 4.2 and 4.4 get rid of the temporary (not gcc 4.3 !!)
@@ -575,9 +575,9 @@ QuaternionBase<Derived>::toRotationMatrix(void) const
*/
template<class Derived>
template<typename Derived1, typename Derived2>
inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
EIGEN_DEVICE_FUNC inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
{
using std::sqrt;
EIGEN_USING_STD_MATH(sqrt)
Vector3 v0 = a.normalized();
Vector3 v1 = b.normalized();
Scalar c = v1.dot(v0);
@@ -616,11 +616,11 @@ inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Deri
* \note The implementation is based on http://planning.cs.uiuc.edu/node198.html
*/
template<typename Scalar, int Options>
Quaternion<Scalar,Options> Quaternion<Scalar,Options>::UnitRandom()
EIGEN_DEVICE_FUNC Quaternion<Scalar,Options> Quaternion<Scalar,Options>::UnitRandom()
{
using std::sqrt;
using std::sin;
using std::cos;
EIGEN_USING_STD_MATH(sqrt)
EIGEN_USING_STD_MATH(sin)
EIGEN_USING_STD_MATH(cos)
const Scalar u1 = internal::random<Scalar>(0, 1),
u2 = internal::random<Scalar>(0, 2*EIGEN_PI),
u3 = internal::random<Scalar>(0, 2*EIGEN_PI);
@@ -642,7 +642,7 @@ Quaternion<Scalar,Options> Quaternion<Scalar,Options>::UnitRandom()
*/
template<typename Scalar, int Options>
template<typename Derived1, typename Derived2>
Quaternion<Scalar,Options> Quaternion<Scalar,Options>::FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
EIGEN_DEVICE_FUNC Quaternion<Scalar,Options> Quaternion<Scalar,Options>::FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
{
Quaternion quat;
quat.setFromTwoVectors(a, b);
@@ -657,7 +657,7 @@ Quaternion<Scalar,Options> Quaternion<Scalar,Options>::FromTwoVectors(const Matr
* \sa QuaternionBase::conjugate()
*/
template <class Derived>
inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Derived>::inverse() const
EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Derived>::inverse() const
{
// FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite() ??
Scalar n2 = this->squaredNorm();
@@ -674,7 +674,7 @@ inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Der
namespace internal {
template<int Arch, class Derived, typename Scalar, int _Options> struct quat_conj
{
static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){
return Quaternion<Scalar>(q.w(),-q.x(),-q.y(),-q.z());
}
};
@@ -687,7 +687,7 @@ template<int Arch, class Derived, typename Scalar, int _Options> struct quat_con
* \sa Quaternion2::inverse()
*/
template <class Derived>
inline Quaternion<typename internal::traits<Derived>::Scalar>
EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar>
QuaternionBase<Derived>::conjugate() const
{
return internal::quat_conj<Architecture::Target, Derived,
@@ -701,13 +701,12 @@ QuaternionBase<Derived>::conjugate() const
*/
template <class Derived>
template <class OtherDerived>
inline typename internal::traits<Derived>::Scalar
EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Scalar
QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& other) const
{
using std::atan2;
using std::abs;
EIGEN_USING_STD_MATH(atan2)
Quaternion<Scalar> d = (*this) * other.conjugate();
return Scalar(2) * atan2( d.vec().norm(), abs(d.w()) );
return Scalar(2) * atan2( d.vec().norm(), numext::abs(d.w()) );
}
@@ -720,15 +719,14 @@ QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& oth
*/
template <class Derived>
template <class OtherDerived>
Quaternion<typename internal::traits<Derived>::Scalar>
EIGEN_DEVICE_FUNC Quaternion<typename internal::traits<Derived>::Scalar>
QuaternionBase<Derived>::slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const
{
using std::acos;
using std::sin;
using std::abs;
EIGEN_USING_STD_MATH(acos)
EIGEN_USING_STD_MATH(sin)
const Scalar one = Scalar(1) - NumTraits<Scalar>::epsilon();
Scalar d = this->dot(other);
Scalar absD = abs(d);
Scalar absD = numext::abs(d);
Scalar scale0;
Scalar scale1;
@@ -759,10 +757,10 @@ template<typename Other>
struct quaternionbase_assign_impl<Other,3,3>
{
typedef typename Other::Scalar Scalar;
template<class Derived> static inline void run(QuaternionBase<Derived>& q, const Other& a_mat)
template<class Derived> EIGEN_DEVICE_FUNC static inline void run(QuaternionBase<Derived>& q, const Other& a_mat)
{
const typename internal::nested_eval<Other,2>::type mat(a_mat);
using std::sqrt;
EIGEN_USING_STD_MATH(sqrt)
// This algorithm comes from "Quaternion Calculus and Fast Animation",
// Ken Shoemake, 1987 SIGGRAPH course notes
Scalar t = mat.trace();
@@ -800,7 +798,7 @@ template<typename Other>
struct quaternionbase_assign_impl<Other,4,1>
{
typedef typename Other::Scalar Scalar;
template<class Derived> static inline void run(QuaternionBase<Derived>& q, const Other& vec)
template<class Derived> EIGEN_DEVICE_FUNC static inline void run(QuaternionBase<Derived>& q, const Other& vec)
{
q.coeffs() = vec;
}

View File

@@ -59,35 +59,35 @@ protected:
public:
/** Construct a 2D counter clock wise rotation from the angle \a a in radian. */
explicit inline Rotation2D(const Scalar& a) : m_angle(a) {}
EIGEN_DEVICE_FUNC explicit inline Rotation2D(const Scalar& a) : m_angle(a) {}
/** Default constructor wihtout initialization. The represented rotation is undefined. */
Rotation2D() {}
EIGEN_DEVICE_FUNC Rotation2D() {}
/** Construct a 2D rotation from a 2x2 rotation matrix \a mat.
*
* \sa fromRotationMatrix()
*/
template<typename Derived>
explicit Rotation2D(const MatrixBase<Derived>& m)
EIGEN_DEVICE_FUNC explicit Rotation2D(const MatrixBase<Derived>& m)
{
fromRotationMatrix(m.derived());
}
/** \returns the rotation angle */
inline Scalar angle() const { return m_angle; }
EIGEN_DEVICE_FUNC inline Scalar angle() const { return m_angle; }
/** \returns a read-write reference to the rotation angle */
inline Scalar& angle() { return m_angle; }
EIGEN_DEVICE_FUNC inline Scalar& angle() { return m_angle; }
/** \returns the rotation angle in [0,2pi] */
inline Scalar smallestPositiveAngle() const {
EIGEN_DEVICE_FUNC inline Scalar smallestPositiveAngle() const {
Scalar tmp = numext::fmod(m_angle,Scalar(2*EIGEN_PI));
return tmp<Scalar(0) ? tmp + Scalar(2*EIGEN_PI) : tmp;
}
/** \returns the rotation angle in [-pi,pi] */
inline Scalar smallestAngle() const {
EIGEN_DEVICE_FUNC inline Scalar smallestAngle() const {
Scalar tmp = numext::fmod(m_angle,Scalar(2*EIGEN_PI));
if(tmp>Scalar(EIGEN_PI)) tmp -= Scalar(2*EIGEN_PI);
else if(tmp<-Scalar(EIGEN_PI)) tmp += Scalar(2*EIGEN_PI);
@@ -95,23 +95,23 @@ public:
}
/** \returns the inverse rotation */
inline Rotation2D inverse() const { return Rotation2D(-m_angle); }
EIGEN_DEVICE_FUNC inline Rotation2D inverse() const { return Rotation2D(-m_angle); }
/** Concatenates two rotations */
inline Rotation2D operator*(const Rotation2D& other) const
EIGEN_DEVICE_FUNC inline Rotation2D operator*(const Rotation2D& other) const
{ return Rotation2D(m_angle + other.m_angle); }
/** Concatenates two rotations */
inline Rotation2D& operator*=(const Rotation2D& other)
EIGEN_DEVICE_FUNC inline Rotation2D& operator*=(const Rotation2D& other)
{ m_angle += other.m_angle; return *this; }
/** Applies the rotation to a 2D vector */
Vector2 operator* (const Vector2& vec) const
EIGEN_DEVICE_FUNC Vector2 operator* (const Vector2& vec) const
{ return toRotationMatrix() * vec; }
template<typename Derived>
Rotation2D& fromRotationMatrix(const MatrixBase<Derived>& m);
Matrix2 toRotationMatrix() const;
EIGEN_DEVICE_FUNC Rotation2D& fromRotationMatrix(const MatrixBase<Derived>& m);
EIGEN_DEVICE_FUNC Matrix2 toRotationMatrix() const;
/** Set \c *this from a 2x2 rotation matrix \a mat.
* In other words, this function extract the rotation angle from the rotation matrix.
@@ -121,13 +121,13 @@ public:
* \sa fromRotationMatrix()
*/
template<typename Derived>
Rotation2D& operator=(const MatrixBase<Derived>& m)
EIGEN_DEVICE_FUNC Rotation2D& operator=(const MatrixBase<Derived>& m)
{ return fromRotationMatrix(m.derived()); }
/** \returns the spherical interpolation between \c *this and \a other using
* parameter \a t. It is in fact equivalent to a linear interpolation.
*/
inline Rotation2D slerp(const Scalar& t, const Rotation2D& other) const
EIGEN_DEVICE_FUNC inline Rotation2D slerp(const Scalar& t, const Rotation2D& other) const
{
Scalar dist = Rotation2D(other.m_angle-m_angle).smallestAngle();
return Rotation2D(m_angle + dist*t);
@@ -139,23 +139,23 @@ public:
* then this function smartly returns a const reference to \c *this.
*/
template<typename NewScalarType>
inline typename internal::cast_return_type<Rotation2D,Rotation2D<NewScalarType> >::type cast() const
EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Rotation2D,Rotation2D<NewScalarType> >::type cast() const
{ return typename internal::cast_return_type<Rotation2D,Rotation2D<NewScalarType> >::type(*this); }
/** Copy constructor with scalar type conversion */
template<typename OtherScalarType>
inline explicit Rotation2D(const Rotation2D<OtherScalarType>& other)
EIGEN_DEVICE_FUNC inline explicit Rotation2D(const Rotation2D<OtherScalarType>& other)
{
m_angle = Scalar(other.angle());
}
static inline Rotation2D Identity() { return Rotation2D(0); }
EIGEN_DEVICE_FUNC static inline Rotation2D Identity() { return Rotation2D(0); }
/** \returns \c true if \c *this is approximately equal to \a other, within the precision
* determined by \a prec.
*
* \sa MatrixBase::isApprox() */
bool isApprox(const Rotation2D& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
EIGEN_DEVICE_FUNC bool isApprox(const Rotation2D& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
{ return internal::isApprox(m_angle,other.m_angle, prec); }
};
@@ -173,9 +173,9 @@ typedef Rotation2D<double> Rotation2Dd;
*/
template<typename Scalar>
template<typename Derived>
Rotation2D<Scalar>& Rotation2D<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)
EIGEN_DEVICE_FUNC Rotation2D<Scalar>& Rotation2D<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)
{
using std::atan2;
EIGEN_USING_STD_MATH(atan2)
EIGEN_STATIC_ASSERT(Derived::RowsAtCompileTime==2 && Derived::ColsAtCompileTime==2,YOU_MADE_A_PROGRAMMING_MISTAKE)
m_angle = atan2(mat.coeff(1,0), mat.coeff(0,0));
return *this;
@@ -185,10 +185,10 @@ Rotation2D<Scalar>& Rotation2D<Scalar>::fromRotationMatrix(const MatrixBase<Deri
*/
template<typename Scalar>
typename Rotation2D<Scalar>::Matrix2
Rotation2D<Scalar>::toRotationMatrix(void) const
EIGEN_DEVICE_FUNC Rotation2D<Scalar>::toRotationMatrix(void) const
{
using std::sin;
using std::cos;
EIGEN_USING_STD_MATH(sin)
EIGEN_USING_STD_MATH(cos)
Scalar sinA = sin(m_angle);
Scalar cosA = cos(m_angle);
return (Matrix2() << cosA, -sinA, sinA, cosA).finished();

View File

@@ -38,26 +38,26 @@ class RotationBase
typedef Matrix<Scalar,Dim,1> VectorType;
public:
inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
inline Derived& derived() { return *static_cast<Derived*>(this); }
EIGEN_DEVICE_FUNC inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
EIGEN_DEVICE_FUNC inline Derived& derived() { return *static_cast<Derived*>(this); }
/** \returns an equivalent rotation matrix */
inline RotationMatrixType toRotationMatrix() const { return derived().toRotationMatrix(); }
EIGEN_DEVICE_FUNC inline RotationMatrixType toRotationMatrix() const { return derived().toRotationMatrix(); }
/** \returns an equivalent rotation matrix
* This function is added to be conform with the Transform class' naming scheme.
*/
inline RotationMatrixType matrix() const { return derived().toRotationMatrix(); }
EIGEN_DEVICE_FUNC inline RotationMatrixType matrix() const { return derived().toRotationMatrix(); }
/** \returns the inverse rotation */
inline Derived inverse() const { return derived().inverse(); }
EIGEN_DEVICE_FUNC inline Derived inverse() const { return derived().inverse(); }
/** \returns the concatenation of the rotation \c *this with a translation \a t */
inline Transform<Scalar,Dim,Isometry> operator*(const Translation<Scalar,Dim>& t) const
EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Isometry> operator*(const Translation<Scalar,Dim>& t) const
{ return Transform<Scalar,Dim,Isometry>(*this) * t; }
/** \returns the concatenation of the rotation \c *this with a uniform scaling \a s */
inline RotationMatrixType operator*(const UniformScaling<Scalar>& s) const
EIGEN_DEVICE_FUNC inline RotationMatrixType operator*(const UniformScaling<Scalar>& s) const
{ return toRotationMatrix() * s.factor(); }
/** \returns the concatenation of the rotation \c *this with a generic expression \a e
@@ -67,17 +67,17 @@ class RotationBase
* - a vector of size Dim
*/
template<typename OtherDerived>
EIGEN_STRONG_INLINE typename internal::rotation_base_generic_product_selector<Derived,OtherDerived,OtherDerived::IsVectorAtCompileTime>::ReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::rotation_base_generic_product_selector<Derived,OtherDerived,OtherDerived::IsVectorAtCompileTime>::ReturnType
operator*(const EigenBase<OtherDerived>& e) const
{ return internal::rotation_base_generic_product_selector<Derived,OtherDerived>::run(derived(), e.derived()); }
/** \returns the concatenation of a linear transformation \a l with the rotation \a r */
template<typename OtherDerived> friend
inline RotationMatrixType operator*(const EigenBase<OtherDerived>& l, const Derived& r)
EIGEN_DEVICE_FUNC inline RotationMatrixType operator*(const EigenBase<OtherDerived>& l, const Derived& r)
{ return l.derived() * r.toRotationMatrix(); }
/** \returns the concatenation of a scaling \a l with the rotation \a r */
friend inline Transform<Scalar,Dim,Affine> operator*(const DiagonalMatrix<Scalar,Dim>& l, const Derived& r)
EIGEN_DEVICE_FUNC friend inline Transform<Scalar,Dim,Affine> operator*(const DiagonalMatrix<Scalar,Dim>& l, const Derived& r)
{
Transform<Scalar,Dim,Affine> res(r);
res.linear().applyOnTheLeft(l);
@@ -86,11 +86,11 @@ class RotationBase
/** \returns the concatenation of the rotation \c *this with a transformation \a t */
template<int Mode, int Options>
inline Transform<Scalar,Dim,Mode> operator*(const Transform<Scalar,Dim,Mode,Options>& t) const
EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode> operator*(const Transform<Scalar,Dim,Mode,Options>& t) const
{ return toRotationMatrix() * t; }
template<typename OtherVectorType>
inline VectorType _transformVector(const OtherVectorType& v) const
EIGEN_DEVICE_FUNC inline VectorType _transformVector(const OtherVectorType& v) const
{ return toRotationMatrix() * v; }
};
@@ -102,7 +102,7 @@ struct rotation_base_generic_product_selector<RotationDerived,MatrixType,false>
{
enum { Dim = RotationDerived::Dim };
typedef Matrix<typename RotationDerived::Scalar,Dim,Dim> ReturnType;
static inline ReturnType run(const RotationDerived& r, const MatrixType& m)
EIGEN_DEVICE_FUNC static inline ReturnType run(const RotationDerived& r, const MatrixType& m)
{ return r.toRotationMatrix() * m; }
};
@@ -110,7 +110,7 @@ template<typename RotationDerived, typename Scalar, int Dim, int MaxDim>
struct rotation_base_generic_product_selector< RotationDerived, DiagonalMatrix<Scalar,Dim,MaxDim>, false >
{
typedef Transform<Scalar,Dim,Affine> ReturnType;
static inline ReturnType run(const RotationDerived& r, const DiagonalMatrix<Scalar,Dim,MaxDim>& m)
EIGEN_DEVICE_FUNC static inline ReturnType run(const RotationDerived& r, const DiagonalMatrix<Scalar,Dim,MaxDim>& m)
{
ReturnType res(r);
res.linear() *= m;
@@ -123,7 +123,7 @@ struct rotation_base_generic_product_selector<RotationDerived,OtherVectorType,tr
{
enum { Dim = RotationDerived::Dim };
typedef Matrix<typename RotationDerived::Scalar,Dim,1> ReturnType;
static EIGEN_STRONG_INLINE ReturnType run(const RotationDerived& r, const OtherVectorType& v)
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE ReturnType run(const RotationDerived& r, const OtherVectorType& v)
{
return r._transformVector(v);
}
@@ -137,7 +137,7 @@ struct rotation_base_generic_product_selector<RotationDerived,OtherVectorType,tr
*/
template<typename _Scalar, int _Rows, int _Cols, int _Storage, int _MaxRows, int _MaxCols>
template<typename OtherDerived>
Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
EIGEN_DEVICE_FUNC Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
::Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
{
EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Matrix,int(OtherDerived::Dim),int(OtherDerived::Dim))
@@ -150,7 +150,7 @@ Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
*/
template<typename _Scalar, int _Rows, int _Cols, int _Storage, int _MaxRows, int _MaxCols>
template<typename OtherDerived>
Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>&
EIGEN_DEVICE_FUNC Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>&
Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
::operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
{
@@ -179,20 +179,20 @@ namespace internal {
* \sa class Transform, class Rotation2D, class Quaternion, class AngleAxis
*/
template<typename Scalar, int Dim>
static inline Matrix<Scalar,2,2> toRotationMatrix(const Scalar& s)
EIGEN_DEVICE_FUNC static inline Matrix<Scalar,2,2> toRotationMatrix(const Scalar& s)
{
EIGEN_STATIC_ASSERT(Dim==2,YOU_MADE_A_PROGRAMMING_MISTAKE)
return Rotation2D<Scalar>(s).toRotationMatrix();
}
template<typename Scalar, int Dim, typename OtherDerived>
static inline Matrix<Scalar,Dim,Dim> toRotationMatrix(const RotationBase<OtherDerived,Dim>& r)
EIGEN_DEVICE_FUNC static inline Matrix<Scalar,Dim,Dim> toRotationMatrix(const RotationBase<OtherDerived,Dim>& r)
{
return r.toRotationMatrix();
}
template<typename Scalar, int Dim, typename OtherDerived>
static inline const MatrixBase<OtherDerived>& toRotationMatrix(const MatrixBase<OtherDerived>& mat)
EIGEN_DEVICE_FUNC static inline const MatrixBase<OtherDerived>& toRotationMatrix(const MatrixBase<OtherDerived>& mat)
{
EIGEN_STATIC_ASSERT(OtherDerived::RowsAtCompileTime==Dim && OtherDerived::ColsAtCompileTime==Dim,
YOU_MADE_A_PROGRAMMING_MISTAKE)

12
Eigen/src/Geometry/Scaling.h Normal file → Executable file
View File

@@ -118,28 +118,28 @@ operator*(const MatrixBase<Derived>& matrix, const UniformScaling<Scalar>& s)
{ return matrix.derived() * s.factor(); }
/** Constructs a uniform scaling from scale factor \a s */
static inline UniformScaling<float> Scaling(float s) { return UniformScaling<float>(s); }
inline UniformScaling<float> Scaling(float s) { return UniformScaling<float>(s); }
/** Constructs a uniform scaling from scale factor \a s */
static inline UniformScaling<double> Scaling(double s) { return UniformScaling<double>(s); }
inline UniformScaling<double> Scaling(double s) { return UniformScaling<double>(s); }
/** Constructs a uniform scaling from scale factor \a s */
template<typename RealScalar>
static inline UniformScaling<std::complex<RealScalar> > Scaling(const std::complex<RealScalar>& s)
inline UniformScaling<std::complex<RealScalar> > Scaling(const std::complex<RealScalar>& s)
{ return UniformScaling<std::complex<RealScalar> >(s); }
/** Constructs a 2D axis aligned scaling */
template<typename Scalar>
static inline DiagonalMatrix<Scalar,2> Scaling(const Scalar& sx, const Scalar& sy)
inline DiagonalMatrix<Scalar,2> Scaling(const Scalar& sx, const Scalar& sy)
{ return DiagonalMatrix<Scalar,2>(sx, sy); }
/** Constructs a 3D axis aligned scaling */
template<typename Scalar>
static inline DiagonalMatrix<Scalar,3> Scaling(const Scalar& sx, const Scalar& sy, const Scalar& sz)
inline DiagonalMatrix<Scalar,3> Scaling(const Scalar& sx, const Scalar& sy, const Scalar& sz)
{ return DiagonalMatrix<Scalar,3>(sx, sy, sz); }
/** Constructs an axis aligned scaling expression from vector expression \a coeffs
* This is an alias for coeffs.asDiagonal()
*/
template<typename Derived>
static inline const DiagonalWrapper<const Derived> Scaling(const MatrixBase<Derived>& coeffs)
inline const DiagonalWrapper<const Derived> Scaling(const MatrixBase<Derived>& coeffs)
{ return coeffs.asDiagonal(); }
/** \deprecated */

View File

@@ -253,43 +253,43 @@ public:
/** Default constructor without initialization of the meaningful coefficients.
* If Mode==Affine, then the last row is set to [0 ... 0 1] */
inline Transform()
EIGEN_DEVICE_FUNC inline Transform()
{
check_template_params();
internal::transform_make_affine<(int(Mode)==Affine) ? Affine : AffineCompact>::run(m_matrix);
}
inline Transform(const Transform& other)
EIGEN_DEVICE_FUNC inline Transform(const Transform& other)
{
check_template_params();
m_matrix = other.m_matrix;
}
inline explicit Transform(const TranslationType& t)
EIGEN_DEVICE_FUNC inline explicit Transform(const TranslationType& t)
{
check_template_params();
*this = t;
}
inline explicit Transform(const UniformScaling<Scalar>& s)
EIGEN_DEVICE_FUNC inline explicit Transform(const UniformScaling<Scalar>& s)
{
check_template_params();
*this = s;
}
template<typename Derived>
inline explicit Transform(const RotationBase<Derived, Dim>& r)
EIGEN_DEVICE_FUNC inline explicit Transform(const RotationBase<Derived, Dim>& r)
{
check_template_params();
*this = r;
}
inline Transform& operator=(const Transform& other)
EIGEN_DEVICE_FUNC inline Transform& operator=(const Transform& other)
{ m_matrix = other.m_matrix; return *this; }
typedef internal::transform_take_affine_part<Transform> take_affine_part;
/** Constructs and initializes a transformation from a Dim^2 or a (Dim+1)^2 matrix. */
template<typename OtherDerived>
inline explicit Transform(const EigenBase<OtherDerived>& other)
EIGEN_DEVICE_FUNC inline explicit Transform(const EigenBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT((internal::is_same<Scalar,typename OtherDerived::Scalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY);
@@ -300,7 +300,7 @@ public:
/** Set \c *this from a Dim^2 or (Dim+1)^2 matrix. */
template<typename OtherDerived>
inline Transform& operator=(const EigenBase<OtherDerived>& other)
EIGEN_DEVICE_FUNC inline Transform& operator=(const EigenBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT((internal::is_same<Scalar,typename OtherDerived::Scalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY);
@@ -310,7 +310,7 @@ public:
}
template<int OtherOptions>
inline Transform(const Transform<Scalar,Dim,Mode,OtherOptions>& other)
EIGEN_DEVICE_FUNC inline Transform(const Transform<Scalar,Dim,Mode,OtherOptions>& other)
{
check_template_params();
// only the options change, we can directly copy the matrices
@@ -318,7 +318,7 @@ public:
}
template<int OtherMode,int OtherOptions>
inline Transform(const Transform<Scalar,Dim,OtherMode,OtherOptions>& other)
EIGEN_DEVICE_FUNC inline Transform(const Transform<Scalar,Dim,OtherMode,OtherOptions>& other)
{
check_template_params();
// prevent conversions as:
@@ -359,14 +359,14 @@ public:
}
template<typename OtherDerived>
Transform(const ReturnByValue<OtherDerived>& other)
EIGEN_DEVICE_FUNC Transform(const ReturnByValue<OtherDerived>& other)
{
check_template_params();
other.evalTo(*this);
}
template<typename OtherDerived>
Transform& operator=(const ReturnByValue<OtherDerived>& other)
EIGEN_DEVICE_FUNC Transform& operator=(const ReturnByValue<OtherDerived>& other)
{
other.evalTo(*this);
return *this;
@@ -381,35 +381,35 @@ public:
inline QTransform toQTransform(void) const;
#endif
Index rows() const { return int(Mode)==int(Projective) ? m_matrix.cols() : (m_matrix.cols()-1); }
Index cols() const { return m_matrix.cols(); }
EIGEN_DEVICE_FUNC Index rows() const { return int(Mode)==int(Projective) ? m_matrix.cols() : (m_matrix.cols()-1); }
EIGEN_DEVICE_FUNC Index cols() const { return m_matrix.cols(); }
/** shortcut for m_matrix(row,col);
* \sa MatrixBase::operator(Index,Index) const */
inline Scalar operator() (Index row, Index col) const { return m_matrix(row,col); }
EIGEN_DEVICE_FUNC inline Scalar operator() (Index row, Index col) const { return m_matrix(row,col); }
/** shortcut for m_matrix(row,col);
* \sa MatrixBase::operator(Index,Index) */
inline Scalar& operator() (Index row, Index col) { return m_matrix(row,col); }
EIGEN_DEVICE_FUNC inline Scalar& operator() (Index row, Index col) { return m_matrix(row,col); }
/** \returns a read-only expression of the transformation matrix */
inline const MatrixType& matrix() const { return m_matrix; }
EIGEN_DEVICE_FUNC inline const MatrixType& matrix() const { return m_matrix; }
/** \returns a writable expression of the transformation matrix */
inline MatrixType& matrix() { return m_matrix; }
EIGEN_DEVICE_FUNC inline MatrixType& matrix() { return m_matrix; }
/** \returns a read-only expression of the linear part of the transformation */
inline ConstLinearPart linear() const { return ConstLinearPart(m_matrix,0,0); }
EIGEN_DEVICE_FUNC inline ConstLinearPart linear() const { return ConstLinearPart(m_matrix,0,0); }
/** \returns a writable expression of the linear part of the transformation */
inline LinearPart linear() { return LinearPart(m_matrix,0,0); }
EIGEN_DEVICE_FUNC inline LinearPart linear() { return LinearPart(m_matrix,0,0); }
/** \returns a read-only expression of the Dim x HDim affine part of the transformation */
inline ConstAffinePart affine() const { return take_affine_part::run(m_matrix); }
EIGEN_DEVICE_FUNC inline ConstAffinePart affine() const { return take_affine_part::run(m_matrix); }
/** \returns a writable expression of the Dim x HDim affine part of the transformation */
inline AffinePart affine() { return take_affine_part::run(m_matrix); }
EIGEN_DEVICE_FUNC inline AffinePart affine() { return take_affine_part::run(m_matrix); }
/** \returns a read-only expression of the translation vector of the transformation */
inline ConstTranslationPart translation() const { return ConstTranslationPart(m_matrix,0,Dim); }
EIGEN_DEVICE_FUNC inline ConstTranslationPart translation() const { return ConstTranslationPart(m_matrix,0,Dim); }
/** \returns a writable expression of the translation vector of the transformation */
inline TranslationPart translation() { return TranslationPart(m_matrix,0,Dim); }
EIGEN_DEVICE_FUNC inline TranslationPart translation() { return TranslationPart(m_matrix,0,Dim); }
/** \returns an expression of the product between the transform \c *this and a matrix expression \a other.
*
@@ -437,7 +437,7 @@ public:
*/
// note: this function is defined here because some compilers cannot find the respective declaration
template<typename OtherDerived>
EIGEN_STRONG_INLINE const typename internal::transform_right_product_impl<Transform, OtherDerived>::ResultType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename internal::transform_right_product_impl<Transform, OtherDerived>::ResultType
operator * (const EigenBase<OtherDerived> &other) const
{ return internal::transform_right_product_impl<Transform, OtherDerived>::run(*this,other.derived()); }
@@ -449,7 +449,7 @@ public:
* \li a general transformation matrix of size Dim+1 x Dim+1.
*/
template<typename OtherDerived> friend
inline const typename internal::transform_left_product_impl<OtherDerived,Mode,Options,_Dim,_Dim+1>::ResultType
EIGEN_DEVICE_FUNC inline const typename internal::transform_left_product_impl<OtherDerived,Mode,Options,_Dim,_Dim+1>::ResultType
operator * (const EigenBase<OtherDerived> &a, const Transform &b)
{ return internal::transform_left_product_impl<OtherDerived,Mode,Options,Dim,HDim>::run(a.derived(),b); }
@@ -460,11 +460,11 @@ public:
* mode is no isometry. In that case, the returned transform is an affinity.
*/
template<typename DiagonalDerived>
inline const TransformTimeDiagonalReturnType
EIGEN_DEVICE_FUNC inline const TransformTimeDiagonalReturnType
operator * (const DiagonalBase<DiagonalDerived> &b) const
{
TransformTimeDiagonalReturnType res(*this);
res.linear() *= b;
res.linearExt() *= b;
return res;
}
@@ -475,7 +475,7 @@ public:
* mode is no isometry. In that case, the returned transform is an affinity.
*/
template<typename DiagonalDerived>
friend inline TransformTimeDiagonalReturnType
EIGEN_DEVICE_FUNC friend inline TransformTimeDiagonalReturnType
operator * (const DiagonalBase<DiagonalDerived> &a, const Transform &b)
{
TransformTimeDiagonalReturnType res;
@@ -487,10 +487,10 @@ public:
}
template<typename OtherDerived>
inline Transform& operator*=(const EigenBase<OtherDerived>& other) { return *this = *this * other; }
EIGEN_DEVICE_FUNC inline Transform& operator*=(const EigenBase<OtherDerived>& other) { return *this = *this * other; }
/** Concatenates two transformations */
inline const Transform operator * (const Transform& other) const
EIGEN_DEVICE_FUNC inline const Transform operator * (const Transform& other) const
{
return internal::transform_transform_product_impl<Transform,Transform>::run(*this,other);
}
@@ -522,7 +522,7 @@ public:
#else
/** Concatenates two different transformations */
template<int OtherMode,int OtherOptions>
inline typename internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::ResultType
EIGEN_DEVICE_FUNC inline typename internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::ResultType
operator * (const Transform<Scalar,Dim,OtherMode,OtherOptions>& other) const
{
return internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::run(*this,other);
@@ -530,47 +530,61 @@ public:
#endif
/** \sa MatrixBase::setIdentity() */
void setIdentity() { m_matrix.setIdentity(); }
EIGEN_DEVICE_FUNC void setIdentity() { m_matrix.setIdentity(); }
/**
* \brief Returns an identity transformation.
* \todo In the future this function should be returning a Transform expression.
*/
static const Transform Identity()
EIGEN_DEVICE_FUNC static const Transform Identity()
{
return Transform(MatrixType::Identity());
}
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
inline Transform& scale(const MatrixBase<OtherDerived> &other);
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
inline Transform& prescale(const MatrixBase<OtherDerived> &other);
inline Transform& scale(const Scalar& s);
inline Transform& prescale(const Scalar& s);
EIGEN_DEVICE_FUNC inline Transform& scale(const Scalar& s);
EIGEN_DEVICE_FUNC inline Transform& prescale(const Scalar& s);
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
inline Transform& translate(const MatrixBase<OtherDerived> &other);
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
inline Transform& pretranslate(const MatrixBase<OtherDerived> &other);
template<typename RotationType>
EIGEN_DEVICE_FUNC
inline Transform& rotate(const RotationType& rotation);
template<typename RotationType>
EIGEN_DEVICE_FUNC
inline Transform& prerotate(const RotationType& rotation);
Transform& shear(const Scalar& sx, const Scalar& sy);
Transform& preshear(const Scalar& sx, const Scalar& sy);
EIGEN_DEVICE_FUNC Transform& shear(const Scalar& sx, const Scalar& sy);
EIGEN_DEVICE_FUNC Transform& preshear(const Scalar& sx, const Scalar& sy);
inline Transform& operator=(const TranslationType& t);
EIGEN_DEVICE_FUNC inline Transform& operator=(const TranslationType& t);
EIGEN_DEVICE_FUNC
inline Transform& operator*=(const TranslationType& t) { return translate(t.vector()); }
inline Transform operator*(const TranslationType& t) const;
EIGEN_DEVICE_FUNC inline Transform operator*(const TranslationType& t) const;
EIGEN_DEVICE_FUNC
inline Transform& operator=(const UniformScaling<Scalar>& t);
EIGEN_DEVICE_FUNC
inline Transform& operator*=(const UniformScaling<Scalar>& s) { return scale(s.factor()); }
EIGEN_DEVICE_FUNC
inline TransformTimeDiagonalReturnType operator*(const UniformScaling<Scalar>& s) const
{
TransformTimeDiagonalReturnType res = *this;
@@ -578,31 +592,36 @@ public:
return res;
}
inline Transform& operator*=(const DiagonalMatrix<Scalar,Dim>& s) { linear() *= s; return *this; }
EIGEN_DEVICE_FUNC
inline Transform& operator*=(const DiagonalMatrix<Scalar,Dim>& s) { linearExt() *= s; return *this; }
template<typename Derived>
inline Transform& operator=(const RotationBase<Derived,Dim>& r);
EIGEN_DEVICE_FUNC inline Transform& operator=(const RotationBase<Derived,Dim>& r);
template<typename Derived>
inline Transform& operator*=(const RotationBase<Derived,Dim>& r) { return rotate(r.toRotationMatrix()); }
EIGEN_DEVICE_FUNC inline Transform& operator*=(const RotationBase<Derived,Dim>& r) { return rotate(r.toRotationMatrix()); }
template<typename Derived>
inline Transform operator*(const RotationBase<Derived,Dim>& r) const;
EIGEN_DEVICE_FUNC inline Transform operator*(const RotationBase<Derived,Dim>& r) const;
const LinearMatrixType rotation() const;
EIGEN_DEVICE_FUNC const LinearMatrixType rotation() const;
template<typename RotationMatrixType, typename ScalingMatrixType>
EIGEN_DEVICE_FUNC
void computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const;
template<typename ScalingMatrixType, typename RotationMatrixType>
EIGEN_DEVICE_FUNC
void computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const;
template<typename PositionDerived, typename OrientationType, typename ScaleDerived>
EIGEN_DEVICE_FUNC
Transform& fromPositionOrientationScale(const MatrixBase<PositionDerived> &position,
const OrientationType& orientation, const MatrixBase<ScaleDerived> &scale);
EIGEN_DEVICE_FUNC
inline Transform inverse(TransformTraits traits = (TransformTraits)Mode) const;
/** \returns a const pointer to the column major internal matrix */
const Scalar* data() const { return m_matrix.data(); }
EIGEN_DEVICE_FUNC const Scalar* data() const { return m_matrix.data(); }
/** \returns a non-const pointer to the column major internal matrix */
Scalar* data() { return m_matrix.data(); }
EIGEN_DEVICE_FUNC Scalar* data() { return m_matrix.data(); }
/** \returns \c *this with scalar type casted to \a NewScalarType
*
@@ -610,12 +629,12 @@ public:
* then this function smartly returns a const reference to \c *this.
*/
template<typename NewScalarType>
inline typename internal::cast_return_type<Transform,Transform<NewScalarType,Dim,Mode,Options> >::type cast() const
EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Transform,Transform<NewScalarType,Dim,Mode,Options> >::type cast() const
{ return typename internal::cast_return_type<Transform,Transform<NewScalarType,Dim,Mode,Options> >::type(*this); }
/** Copy constructor with scalar type conversion */
template<typename OtherScalarType>
inline explicit Transform(const Transform<OtherScalarType,Dim,Mode,Options>& other)
EIGEN_DEVICE_FUNC inline explicit Transform(const Transform<OtherScalarType,Dim,Mode,Options>& other)
{
check_template_params();
m_matrix = other.matrix().template cast<Scalar>();
@@ -625,12 +644,12 @@ public:
* determined by \a prec.
*
* \sa MatrixBase::isApprox() */
bool isApprox(const Transform& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
EIGEN_DEVICE_FUNC bool isApprox(const Transform& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
{ return m_matrix.isApprox(other.m_matrix, prec); }
/** Sets the last row to [0 ... 0 1]
*/
void makeAffine()
EIGEN_DEVICE_FUNC void makeAffine()
{
internal::transform_make_affine<int(Mode)>::run(m_matrix);
}
@@ -639,26 +658,26 @@ public:
* \returns the Dim x Dim linear part if the transformation is affine,
* and the HDim x Dim part for projective transformations.
*/
inline Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,Dim> linearExt()
EIGEN_DEVICE_FUNC inline Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,Dim> linearExt()
{ return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,Dim>(0,0); }
/** \internal
* \returns the Dim x Dim linear part if the transformation is affine,
* and the HDim x Dim part for projective transformations.
*/
inline const Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,Dim> linearExt() const
EIGEN_DEVICE_FUNC inline const Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,Dim> linearExt() const
{ return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,Dim>(0,0); }
/** \internal
* \returns the translation part if the transformation is affine,
* and the last column for projective transformations.
*/
inline Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,1> translationExt()
EIGEN_DEVICE_FUNC inline Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,1> translationExt()
{ return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,1>(0,Dim); }
/** \internal
* \returns the translation part if the transformation is affine,
* and the last column for projective transformations.
*/
inline const Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,1> translationExt() const
EIGEN_DEVICE_FUNC inline const Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,1> translationExt() const
{ return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,1>(0,Dim); }
@@ -668,7 +687,7 @@ public:
protected:
#ifndef EIGEN_PARSED_BY_DOXYGEN
static EIGEN_STRONG_INLINE void check_template_params()
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void check_template_params()
{
EIGEN_STATIC_ASSERT((Options & (DontAlign|RowMajor)) == Options, INVALID_MATRIX_TEMPLATE_PARAMETERS)
}
@@ -821,7 +840,7 @@ QTransform Transform<Scalar,Dim,Mode,Options>::toQTransform(void) const
*/
template<typename Scalar, int Dim, int Mode, int Options>
template<typename OtherDerived>
Transform<Scalar,Dim,Mode,Options>&
EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
Transform<Scalar,Dim,Mode,Options>::scale(const MatrixBase<OtherDerived> &other)
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
@@ -835,7 +854,7 @@ Transform<Scalar,Dim,Mode,Options>::scale(const MatrixBase<OtherDerived> &other)
* \sa prescale(Scalar)
*/
template<typename Scalar, int Dim, int Mode, int Options>
inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::scale(const Scalar& s)
EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::scale(const Scalar& s)
{
EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
linearExt() *= s;
@@ -848,12 +867,12 @@ inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::s
*/
template<typename Scalar, int Dim, int Mode, int Options>
template<typename OtherDerived>
Transform<Scalar,Dim,Mode,Options>&
EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
Transform<Scalar,Dim,Mode,Options>::prescale(const MatrixBase<OtherDerived> &other)
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
m_matrix.template block<Dim,HDim>(0,0).noalias() = (other.asDiagonal() * m_matrix.template block<Dim,HDim>(0,0));
affine().noalias() = (other.asDiagonal() * affine());
return *this;
}
@@ -862,7 +881,7 @@ Transform<Scalar,Dim,Mode,Options>::prescale(const MatrixBase<OtherDerived> &oth
* \sa scale(Scalar)
*/
template<typename Scalar, int Dim, int Mode, int Options>
inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::prescale(const Scalar& s)
EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::prescale(const Scalar& s)
{
EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
m_matrix.template topRows<Dim>() *= s;
@@ -875,7 +894,7 @@ inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::p
*/
template<typename Scalar, int Dim, int Mode, int Options>
template<typename OtherDerived>
Transform<Scalar,Dim,Mode,Options>&
EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
Transform<Scalar,Dim,Mode,Options>::translate(const MatrixBase<OtherDerived> &other)
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
@@ -889,7 +908,7 @@ Transform<Scalar,Dim,Mode,Options>::translate(const MatrixBase<OtherDerived> &ot
*/
template<typename Scalar, int Dim, int Mode, int Options>
template<typename OtherDerived>
Transform<Scalar,Dim,Mode,Options>&
EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
Transform<Scalar,Dim,Mode,Options>::pretranslate(const MatrixBase<OtherDerived> &other)
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
@@ -919,7 +938,7 @@ Transform<Scalar,Dim,Mode,Options>::pretranslate(const MatrixBase<OtherDerived>
*/
template<typename Scalar, int Dim, int Mode, int Options>
template<typename RotationType>
Transform<Scalar,Dim,Mode,Options>&
EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
Transform<Scalar,Dim,Mode,Options>::rotate(const RotationType& rotation)
{
linearExt() *= internal::toRotationMatrix<Scalar,Dim>(rotation);
@@ -935,7 +954,7 @@ Transform<Scalar,Dim,Mode,Options>::rotate(const RotationType& rotation)
*/
template<typename Scalar, int Dim, int Mode, int Options>
template<typename RotationType>
Transform<Scalar,Dim,Mode,Options>&
EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
Transform<Scalar,Dim,Mode,Options>::prerotate(const RotationType& rotation)
{
m_matrix.template block<Dim,HDim>(0,0) = internal::toRotationMatrix<Scalar,Dim>(rotation)
@@ -949,7 +968,7 @@ Transform<Scalar,Dim,Mode,Options>::prerotate(const RotationType& rotation)
* \sa preshear()
*/
template<typename Scalar, int Dim, int Mode, int Options>
Transform<Scalar,Dim,Mode,Options>&
EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
Transform<Scalar,Dim,Mode,Options>::shear(const Scalar& sx, const Scalar& sy)
{
EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
@@ -965,7 +984,7 @@ Transform<Scalar,Dim,Mode,Options>::shear(const Scalar& sx, const Scalar& sy)
* \sa shear()
*/
template<typename Scalar, int Dim, int Mode, int Options>
Transform<Scalar,Dim,Mode,Options>&
EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
Transform<Scalar,Dim,Mode,Options>::preshear(const Scalar& sx, const Scalar& sy)
{
EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
@@ -979,7 +998,7 @@ Transform<Scalar,Dim,Mode,Options>::preshear(const Scalar& sx, const Scalar& sy)
******************************************************/
template<typename Scalar, int Dim, int Mode, int Options>
inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const TranslationType& t)
EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const TranslationType& t)
{
linear().setIdentity();
translation() = t.vector();
@@ -988,7 +1007,7 @@ inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::o
}
template<typename Scalar, int Dim, int Mode, int Options>
inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::operator*(const TranslationType& t) const
EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::operator*(const TranslationType& t) const
{
Transform res = *this;
res.translate(t.vector());
@@ -996,7 +1015,7 @@ inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::op
}
template<typename Scalar, int Dim, int Mode, int Options>
inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const UniformScaling<Scalar>& s)
EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const UniformScaling<Scalar>& s)
{
m_matrix.setZero();
linear().diagonal().fill(s.factor());
@@ -1006,7 +1025,7 @@ inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::o
template<typename Scalar, int Dim, int Mode, int Options>
template<typename Derived>
inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const RotationBase<Derived,Dim>& r)
EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const RotationBase<Derived,Dim>& r)
{
linear() = internal::toRotationMatrix<Scalar,Dim>(r);
translation().setZero();
@@ -1016,7 +1035,7 @@ inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::o
template<typename Scalar, int Dim, int Mode, int Options>
template<typename Derived>
inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::operator*(const RotationBase<Derived,Dim>& r) const
EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::operator*(const RotationBase<Derived,Dim>& r) const
{
Transform res = *this;
res.rotate(r.derived());
@@ -1035,7 +1054,7 @@ inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::op
* \sa computeRotationScaling(), computeScalingRotation(), class SVD
*/
template<typename Scalar, int Dim, int Mode, int Options>
const typename Transform<Scalar,Dim,Mode,Options>::LinearMatrixType
EIGEN_DEVICE_FUNC const typename Transform<Scalar,Dim,Mode,Options>::LinearMatrixType
Transform<Scalar,Dim,Mode,Options>::rotation() const
{
LinearMatrixType result;
@@ -1057,7 +1076,7 @@ Transform<Scalar,Dim,Mode,Options>::rotation() const
*/
template<typename Scalar, int Dim, int Mode, int Options>
template<typename RotationMatrixType, typename ScalingMatrixType>
void Transform<Scalar,Dim,Mode,Options>::computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const
EIGEN_DEVICE_FUNC void Transform<Scalar,Dim,Mode,Options>::computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const
{
JacobiSVD<LinearMatrixType> svd(linear(), ComputeFullU | ComputeFullV);
@@ -1086,7 +1105,7 @@ void Transform<Scalar,Dim,Mode,Options>::computeRotationScaling(RotationMatrixTy
*/
template<typename Scalar, int Dim, int Mode, int Options>
template<typename ScalingMatrixType, typename RotationMatrixType>
void Transform<Scalar,Dim,Mode,Options>::computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const
EIGEN_DEVICE_FUNC void Transform<Scalar,Dim,Mode,Options>::computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const
{
JacobiSVD<LinearMatrixType> svd(linear(), ComputeFullU | ComputeFullV);
@@ -1107,7 +1126,7 @@ void Transform<Scalar,Dim,Mode,Options>::computeScalingRotation(ScalingMatrixTyp
*/
template<typename Scalar, int Dim, int Mode, int Options>
template<typename PositionDerived, typename OrientationType, typename ScaleDerived>
Transform<Scalar,Dim,Mode,Options>&
EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
Transform<Scalar,Dim,Mode,Options>::fromPositionOrientationScale(const MatrixBase<PositionDerived> &position,
const OrientationType& orientation, const MatrixBase<ScaleDerived> &scale)
{
@@ -1124,7 +1143,7 @@ template<int Mode>
struct transform_make_affine
{
template<typename MatrixType>
static void run(MatrixType &mat)
EIGEN_DEVICE_FUNC static void run(MatrixType &mat)
{
static const int Dim = MatrixType::ColsAtCompileTime-1;
mat.template block<1,Dim>(Dim,0).setZero();
@@ -1135,21 +1154,21 @@ struct transform_make_affine
template<>
struct transform_make_affine<AffineCompact>
{
template<typename MatrixType> static void run(MatrixType &) { }
template<typename MatrixType> EIGEN_DEVICE_FUNC static void run(MatrixType &) { }
};
// selector needed to avoid taking the inverse of a 3x4 matrix
template<typename TransformType, int Mode=TransformType::Mode>
struct projective_transform_inverse
{
static inline void run(const TransformType&, TransformType&)
EIGEN_DEVICE_FUNC static inline void run(const TransformType&, TransformType&)
{}
};
template<typename TransformType>
struct projective_transform_inverse<TransformType, Projective>
{
static inline void run(const TransformType& m, TransformType& res)
EIGEN_DEVICE_FUNC static inline void run(const TransformType& m, TransformType& res)
{
res.matrix() = m.matrix().inverse();
}
@@ -1179,7 +1198,7 @@ struct projective_transform_inverse<TransformType, Projective>
* \sa MatrixBase::inverse()
*/
template<typename Scalar, int Dim, int Mode, int Options>
Transform<Scalar,Dim,Mode,Options>
EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>
Transform<Scalar,Dim,Mode,Options>::inverse(TransformTraits hint) const
{
Transform res;

View File

@@ -51,16 +51,16 @@ protected:
public:
/** Default constructor without initialization. */
Translation() {}
EIGEN_DEVICE_FUNC Translation() {}
/** */
inline Translation(const Scalar& sx, const Scalar& sy)
EIGEN_DEVICE_FUNC inline Translation(const Scalar& sx, const Scalar& sy)
{
eigen_assert(Dim==2);
m_coeffs.x() = sx;
m_coeffs.y() = sy;
}
/** */
inline Translation(const Scalar& sx, const Scalar& sy, const Scalar& sz)
EIGEN_DEVICE_FUNC inline Translation(const Scalar& sx, const Scalar& sy, const Scalar& sz)
{
eigen_assert(Dim==3);
m_coeffs.x() = sx;
@@ -68,48 +68,48 @@ public:
m_coeffs.z() = sz;
}
/** Constructs and initialize the translation transformation from a vector of translation coefficients */
explicit inline Translation(const VectorType& vector) : m_coeffs(vector) {}
EIGEN_DEVICE_FUNC explicit inline Translation(const VectorType& vector) : m_coeffs(vector) {}
/** \brief Retruns the x-translation by value. **/
inline Scalar x() const { return m_coeffs.x(); }
EIGEN_DEVICE_FUNC inline Scalar x() const { return m_coeffs.x(); }
/** \brief Retruns the y-translation by value. **/
inline Scalar y() const { return m_coeffs.y(); }
EIGEN_DEVICE_FUNC inline Scalar y() const { return m_coeffs.y(); }
/** \brief Retruns the z-translation by value. **/
inline Scalar z() const { return m_coeffs.z(); }
EIGEN_DEVICE_FUNC inline Scalar z() const { return m_coeffs.z(); }
/** \brief Retruns the x-translation as a reference. **/
inline Scalar& x() { return m_coeffs.x(); }
EIGEN_DEVICE_FUNC inline Scalar& x() { return m_coeffs.x(); }
/** \brief Retruns the y-translation as a reference. **/
inline Scalar& y() { return m_coeffs.y(); }
EIGEN_DEVICE_FUNC inline Scalar& y() { return m_coeffs.y(); }
/** \brief Retruns the z-translation as a reference. **/
inline Scalar& z() { return m_coeffs.z(); }
EIGEN_DEVICE_FUNC inline Scalar& z() { return m_coeffs.z(); }
const VectorType& vector() const { return m_coeffs; }
VectorType& vector() { return m_coeffs; }
EIGEN_DEVICE_FUNC const VectorType& vector() const { return m_coeffs; }
EIGEN_DEVICE_FUNC VectorType& vector() { return m_coeffs; }
const VectorType& translation() const { return m_coeffs; }
VectorType& translation() { return m_coeffs; }
EIGEN_DEVICE_FUNC const VectorType& translation() const { return m_coeffs; }
EIGEN_DEVICE_FUNC VectorType& translation() { return m_coeffs; }
/** Concatenates two translation */
inline Translation operator* (const Translation& other) const
EIGEN_DEVICE_FUNC inline Translation operator* (const Translation& other) const
{ return Translation(m_coeffs + other.m_coeffs); }
/** Concatenates a translation and a uniform scaling */
inline AffineTransformType operator* (const UniformScaling<Scalar>& other) const;
EIGEN_DEVICE_FUNC inline AffineTransformType operator* (const UniformScaling<Scalar>& other) const;
/** Concatenates a translation and a linear transformation */
template<typename OtherDerived>
inline AffineTransformType operator* (const EigenBase<OtherDerived>& linear) const;
EIGEN_DEVICE_FUNC inline AffineTransformType operator* (const EigenBase<OtherDerived>& linear) const;
/** Concatenates a translation and a rotation */
template<typename Derived>
inline IsometryTransformType operator*(const RotationBase<Derived,Dim>& r) const
EIGEN_DEVICE_FUNC inline IsometryTransformType operator*(const RotationBase<Derived,Dim>& r) const
{ return *this * IsometryTransformType(r); }
/** \returns the concatenation of a linear transformation \a l with the translation \a t */
// its a nightmare to define a templated friend function outside its declaration
template<typename OtherDerived> friend
inline AffineTransformType operator*(const EigenBase<OtherDerived>& linear, const Translation& t)
EIGEN_DEVICE_FUNC inline AffineTransformType operator*(const EigenBase<OtherDerived>& linear, const Translation& t)
{
AffineTransformType res;
res.matrix().setZero();
@@ -122,7 +122,7 @@ public:
/** Concatenates a translation and a transformation */
template<int Mode, int Options>
inline Transform<Scalar,Dim,Mode> operator* (const Transform<Scalar,Dim,Mode,Options>& t) const
EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode> operator* (const Transform<Scalar,Dim,Mode,Options>& t) const
{
Transform<Scalar,Dim,Mode> res = t;
res.pretranslate(m_coeffs);
@@ -130,8 +130,10 @@ public:
}
/** Applies translation to vector */
inline VectorType operator* (const VectorType& other) const
{ return m_coeffs + other; }
template<typename Derived>
inline typename internal::enable_if<Derived::IsVectorAtCompileTime,VectorType>::type
operator* (const MatrixBase<Derived>& vec) const
{ return m_coeffs + vec.derived(); }
/** \returns the inverse translation (opposite) */
Translation inverse() const { return Translation(-m_coeffs); }
@@ -150,19 +152,19 @@ public:
* then this function smartly returns a const reference to \c *this.
*/
template<typename NewScalarType>
inline typename internal::cast_return_type<Translation,Translation<NewScalarType,Dim> >::type cast() const
EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Translation,Translation<NewScalarType,Dim> >::type cast() const
{ return typename internal::cast_return_type<Translation,Translation<NewScalarType,Dim> >::type(*this); }
/** Copy constructor with scalar type conversion */
template<typename OtherScalarType>
inline explicit Translation(const Translation<OtherScalarType,Dim>& other)
EIGEN_DEVICE_FUNC inline explicit Translation(const Translation<OtherScalarType,Dim>& other)
{ m_coeffs = other.vector().template cast<Scalar>(); }
/** \returns \c true if \c *this is approximately equal to \a other, within the precision
* determined by \a prec.
*
* \sa MatrixBase::isApprox() */
bool isApprox(const Translation& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
EIGEN_DEVICE_FUNC bool isApprox(const Translation& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
{ return m_coeffs.isApprox(other.m_coeffs, prec); }
};
@@ -176,7 +178,7 @@ typedef Translation<double,3> Translation3d;
//@}
template<typename Scalar, int Dim>
inline typename Translation<Scalar,Dim>::AffineTransformType
EIGEN_DEVICE_FUNC inline typename Translation<Scalar,Dim>::AffineTransformType
Translation<Scalar,Dim>::operator* (const UniformScaling<Scalar>& other) const
{
AffineTransformType res;
@@ -189,7 +191,7 @@ Translation<Scalar,Dim>::operator* (const UniformScaling<Scalar>& other) const
template<typename Scalar, int Dim>
template<typename OtherDerived>
inline typename Translation<Scalar,Dim>::AffineTransformType
EIGEN_DEVICE_FUNC inline typename Translation<Scalar,Dim>::AffineTransformType
Translation<Scalar,Dim>::operator* (const EigenBase<OtherDerived>& linear) const
{
AffineTransformType res;

View File

@@ -330,25 +330,27 @@ public:
}
/** \internal */
template<typename Rhs, typename DestScalar, int DestOptions, typename DestIndex>
void _solve_impl(const Rhs& b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
template<typename Rhs, typename DestDerived>
void _solve_impl(const Rhs& b, SparseMatrixBase<DestDerived> &aDest) const
{
eigen_assert(rows()==b.rows());
Index rhsCols = b.cols();
Index size = b.rows();
DestDerived& dest(aDest.derived());
typedef typename DestDerived::Scalar DestScalar;
Eigen::Matrix<DestScalar,Dynamic,1> tb(size);
Eigen::Matrix<DestScalar,Dynamic,1> tx(cols());
// We do not directly fill dest because sparse expressions have to be free of aliasing issue.
// For non square least-square problems, b and dest might not have the same size whereas they might alias each-other.
SparseMatrix<DestScalar,DestOptions,DestIndex> tmp(cols(),rhsCols);
typename DestDerived::PlainObject tmp(cols(),rhsCols);
for(Index k=0; k<rhsCols; ++k)
{
tb = b.col(k);
tx = derived().solve(tb);
tmp.col(k) = tx.sparseView(0);
}
tmp.swap(dest);
dest.swap(tmp);
}
protected:

View File

@@ -98,7 +98,11 @@ struct Assignment<DstXprType, SolveWithGuess<DecType,RhsType,GuessType>, interna
typedef SolveWithGuess<DecType,RhsType,GuessType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{
// FIXME shall we resize dst here?
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
dst = src.guess();
src.dec()._solve_with_guess_impl(src.rhs(), dst/*, src.guess()*/);
}

View File

@@ -292,7 +292,11 @@ struct Assignment<DstXprType, Inverse<XprType>, internal::assign_op<typename Dst
typedef Inverse<XprType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar> &)
{
// FIXME shall we resize dst here?
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime);
EIGEN_ONLY_USED_FOR_DEBUG(Size);
eigen_assert(( (Size<=1) || (Size>4) || (extract_data(src.nestedExpression())!=extract_data(dst)))

View File

@@ -506,7 +506,7 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
m_colNormsUpdated.coeffRef(k) = m_colNormsDirect.coeffRef(k);
}
RealScalar threshold_helper = numext::abs2(m_colNormsUpdated.maxCoeff() * NumTraits<Scalar>::epsilon()) / RealScalar(rows);
RealScalar threshold_helper = numext::abs2<Scalar>(m_colNormsUpdated.maxCoeff() * NumTraits<Scalar>::epsilon()) / RealScalar(rows);
RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<Scalar>::epsilon());
m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
@@ -557,8 +557,8 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
RealScalar temp = abs(m_qr.coeffRef(k, j)) / m_colNormsUpdated.coeffRef(j);
temp = (RealScalar(1) + temp) * (RealScalar(1) - temp);
temp = temp < 0 ? 0 : temp;
RealScalar temp2 = temp * numext::abs2(m_colNormsUpdated.coeffRef(j) /
m_colNormsDirect.coeffRef(j));
RealScalar temp2 = temp * numext::abs2<Scalar>(m_colNormsUpdated.coeffRef(j) /
m_colNormsDirect.coeffRef(j));
if (temp2 <= norm_downdate_threshold) {
// The updated norm has become too inaccurate so re-compute the column
// norm directly.
@@ -613,12 +613,12 @@ void ColPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &
namespace internal {
template<typename DstXprType, typename MatrixType, typename Scalar>
struct Assignment<DstXprType, Inverse<ColPivHouseholderQR<MatrixType> >, internal::assign_op<Scalar,Scalar>, Dense2Dense>
template<typename DstXprType, typename MatrixType>
struct Assignment<DstXprType, Inverse<ColPivHouseholderQR<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename ColPivHouseholderQR<MatrixType>::Scalar>, Dense2Dense>
{
typedef ColPivHouseholderQR<MatrixType> QrType;
typedef Inverse<QrType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename QrType::Scalar> &)
{
dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
}

View File

@@ -575,12 +575,12 @@ void FullPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType
namespace internal {
template<typename DstXprType, typename MatrixType, typename Scalar>
struct Assignment<DstXprType, Inverse<FullPivHouseholderQR<MatrixType> >, internal::assign_op<Scalar,Scalar>, Dense2Dense>
template<typename DstXprType, typename MatrixType>
struct Assignment<DstXprType, Inverse<FullPivHouseholderQR<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename FullPivHouseholderQR<MatrixType>::Scalar>, Dense2Dense>
{
typedef FullPivHouseholderQR<MatrixType> QrType;
typedef Inverse<QrType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename QrType::Scalar> &)
{
dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
}

View File

@@ -119,16 +119,16 @@ class SPQR : public SparseSolverBase<SPQR<_MatrixType> >
max2Norm = RealScalar(1);
pivotThreshold = 20 * (mat.rows() + mat.cols()) * max2Norm * NumTraits<RealScalar>::epsilon();
}
cholmod_sparse A;
A = viewAsCholmod(mat);
m_rows = matrix.rows();
Index col = matrix.cols();
m_rank = SuiteSparseQR<Scalar>(m_ordering, pivotThreshold, col, &A,
&m_cR, &m_E, &m_H, &m_HPinv, &m_HTau, &m_cc);
if (!m_cR)
{
m_info = NumericalIssue;
m_info = NumericalIssue;
m_isInitialized = false;
return;
}
@@ -139,7 +139,7 @@ class SPQR : public SparseSolverBase<SPQR<_MatrixType> >
/**
* Get the number of rows of the input matrix and the Q matrix
*/
inline Index rows() const {return m_cR->nrow; }
inline Index rows() const {return m_rows; }
/**
* Get the number of columns of the input matrix.
@@ -245,6 +245,7 @@ class SPQR : public SparseSolverBase<SPQR<_MatrixType> >
mutable Index m_rank; // The rank of the matrix
mutable cholmod_common m_cc; // Workspace and parameters
bool m_useDefaultThreshold; // Use default threshold
Index m_rows;
template<typename ,typename > friend struct SPQR_QProduct;
};

View File

@@ -412,7 +412,7 @@ struct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, true>
}
// update largest diagonal entry
maxDiagEntry = numext::maxi(maxDiagEntry,numext::maxi(abs(work_matrix.coeff(p,p)), abs(work_matrix.coeff(q,q))));
maxDiagEntry = numext::maxi<RealScalar>(maxDiagEntry,numext::maxi<RealScalar>(abs(work_matrix.coeff(p,p)), abs(work_matrix.coeff(q,q))));
// and check whether the 2x2 block is already diagonal
RealScalar threshold = numext::maxi<RealScalar>(considerAsZero, precision * maxDiagEntry);
return abs(work_matrix.coeff(p,q))>threshold || abs(work_matrix.coeff(q,p)) > threshold;
@@ -725,7 +725,7 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
if(computeV()) m_matrixV.applyOnTheRight(p,q,j_right);
// keep track of the largest diagonal coefficient
maxDiagEntry = numext::maxi<RealScalar>(maxDiagEntry,numext::maxi(abs(m_workMatrix.coeff(p,p)), abs(m_workMatrix.coeff(q,q))));
maxDiagEntry = numext::maxi<RealScalar>(maxDiagEntry,numext::maxi<RealScalar>(abs(m_workMatrix.coeff(p,p)), abs(m_workMatrix.coeff(q,q))));
}
}
}

View File

@@ -139,13 +139,16 @@ struct Assignment<DstXprType, SrcXprType, Functor, Sparse2Dense>
{
static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
if(internal::is_same<Functor,internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> >::value)
dst.setZero();
internal::evaluator<SrcXprType> srcEval(src);
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
internal::evaluator<DstXprType> dstEval(dst);
const Index outerEvaluationSize = (internal::evaluator<SrcXprType>::Flags&RowMajorBit) ? src.rows() : src.cols();
for (Index j=0; j<outerEvaluationSize; ++j)
for (typename internal::evaluator<SrcXprType>::InnerIterator i(srcEval,j); i; ++i)
@@ -161,6 +164,11 @@ struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar
typedef Solve<DecType,RhsType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
src.dec()._solve_impl(src.rhs(), dst);
}
};
@@ -179,6 +187,11 @@ struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Sparse>
template<int Options>
static void run(SparseMatrix<Scalar,Options,StorageIndex> &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
Index size = src.diagonal().size();
dst.makeCompressed();
dst.resizeNonZeros(size);

View File

@@ -130,7 +130,7 @@ public:
// 2 - let's check whether there is enough allocated memory
Index nnz = tmp.nonZeros();
Index start = m_outerStart==0 ? 0 : matrix.outerIndexPtr()[m_outerStart]; // starting position of the current block
Index start = m_outerStart==0 ? 0 : m_matrix.outerIndexPtr()[m_outerStart]; // starting position of the current block
Index end = m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()]; // ending position of the current block
Index block_size = end - start; // available room in the current block
Index tail_size = m_matrix.outerIndexPtr()[m_matrix.outerSize()] - end;
@@ -139,6 +139,8 @@ public:
? Index(matrix.data().allocatedSize()) + block_size
: block_size;
Index tmp_start = tmp.outerIndexPtr()[0];
bool update_trailing_pointers = false;
if(nnz>free_size)
{
@@ -148,8 +150,8 @@ public:
internal::smart_copy(m_matrix.valuePtr(), m_matrix.valuePtr() + start, newdata.valuePtr());
internal::smart_copy(m_matrix.innerIndexPtr(), m_matrix.innerIndexPtr() + start, newdata.indexPtr());
internal::smart_copy(tmp.valuePtr(), tmp.valuePtr() + nnz, newdata.valuePtr() + start);
internal::smart_copy(tmp.innerIndexPtr(), tmp.innerIndexPtr() + nnz, newdata.indexPtr() + start);
internal::smart_copy(tmp.valuePtr() + tmp_start, tmp.valuePtr() + tmp_start + nnz, newdata.valuePtr() + start);
internal::smart_copy(tmp.innerIndexPtr() + tmp_start, tmp.innerIndexPtr() + tmp_start + nnz, newdata.indexPtr() + start);
internal::smart_copy(matrix.valuePtr()+end, matrix.valuePtr()+end + tail_size, newdata.valuePtr()+start+nnz);
internal::smart_copy(matrix.innerIndexPtr()+end, matrix.innerIndexPtr()+end + tail_size, newdata.indexPtr()+start+nnz);
@@ -173,8 +175,8 @@ public:
update_trailing_pointers = true;
}
internal::smart_copy(tmp.valuePtr(), tmp.valuePtr() + nnz, matrix.valuePtr() + start);
internal::smart_copy(tmp.innerIndexPtr(), tmp.innerIndexPtr() + nnz, matrix.innerIndexPtr() + start);
internal::smart_copy(tmp.valuePtr() + tmp_start, tmp.valuePtr() + tmp_start + nnz, matrix.valuePtr() + start);
internal::smart_copy(tmp.innerIndexPtr() + tmp_start, tmp.innerIndexPtr() + tmp_start + nnz, matrix.innerIndexPtr() + start);
}
// update outer index pointers and innerNonZeros
@@ -430,7 +432,6 @@ public:
protected:
// friend class internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel>;
friend class ReverseInnerIterator;
friend struct internal::unary_evaluator<Block<XprType,BlockRows,BlockCols,InnerPanel>, internal::IteratorBased, Scalar >;
Index nonZeros() const { return Dynamic; }
@@ -466,8 +467,6 @@ struct unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBa
typedef typename XprType::StorageIndex StorageIndex;
typedef typename XprType::Scalar Scalar;
class ReverseInnerIterator;
enum {
IsRowMajor = XprType::IsRowMajor,

View File

@@ -224,11 +224,11 @@ class SparseCompressedBase<Derived>::ReverseInnerIterator
}
else
{
m_start.value() = mat.outerIndexPtr()[outer];
m_start = mat.outerIndexPtr()[outer];
if(mat.isCompressed())
m_id = mat.outerIndexPtr()[outer+1];
else
m_id = m_start.value() + mat.innerNonZeroPtr()[outer];
m_id = m_start + mat.innerNonZeroPtr()[outer];
}
}
@@ -254,14 +254,15 @@ class SparseCompressedBase<Derived>::ReverseInnerIterator
inline Index row() const { return IsRowMajor ? m_outer.value() : index(); }
inline Index col() const { return IsRowMajor ? index() : m_outer.value(); }
inline operator bool() const { return (m_id > m_start.value()); }
inline operator bool() const { return (m_id > m_start); }
protected:
const Scalar* m_values;
const StorageIndex* m_indices;
const internal::variable_if_dynamic<Index,Derived::IsVectorAtCompileTime?0:Dynamic> m_outer;
typedef internal::variable_if_dynamic<Index,Derived::IsVectorAtCompileTime?0:Dynamic> OuterType;
const OuterType m_outer;
Index m_start;
Index m_id;
const internal::variable_if_dynamic<Index,Derived::IsVectorAtCompileTime?0:Dynamic> m_start;
};
namespace internal {
@@ -272,7 +273,6 @@ struct evaluator<SparseCompressedBase<Derived> >
{
typedef typename Derived::Scalar Scalar;
typedef typename Derived::InnerIterator InnerIterator;
typedef typename Derived::ReverseInnerIterator ReverseInnerIterator;
enum {
CoeffReadCost = NumTraits<Scalar>::ReadCost,

View File

@@ -68,7 +68,6 @@ protected:
typedef typename XprType::StorageIndex StorageIndex;
public:
class ReverseInnerIterator;
class InnerIterator
{
public:
@@ -161,7 +160,6 @@ protected:
typedef typename XprType::StorageIndex StorageIndex;
public:
class ReverseInnerIterator;
class InnerIterator
{
enum { IsRowMajor = (int(Rhs::Flags)&RowMajorBit)==RowMajorBit };
@@ -249,7 +247,6 @@ protected:
typedef typename XprType::StorageIndex StorageIndex;
public:
class ReverseInnerIterator;
class InnerIterator
{
enum { IsRowMajor = (int(Lhs::Flags)&RowMajorBit)==RowMajorBit };
@@ -325,26 +322,88 @@ protected:
const XprType &m_expr;
};
template<typename T,
typename LhsKind = typename evaluator_traits<typename T::Lhs>::Kind,
typename RhsKind = typename evaluator_traits<typename T::Rhs>::Kind,
typename LhsScalar = typename traits<typename T::Lhs>::Scalar,
typename RhsScalar = typename traits<typename T::Rhs>::Scalar> struct sparse_conjunction_evaluator;
// "sparse .* sparse"
template<typename T1, typename T2, typename Lhs, typename Rhs>
struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs>, IteratorBased, IteratorBased>
: evaluator_base<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> >
: sparse_conjunction_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> >
{
typedef CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> XprType;
typedef sparse_conjunction_evaluator<XprType> Base;
explicit binary_evaluator(const XprType& xpr) : Base(xpr) {}
};
// "dense .* sparse"
template<typename T1, typename T2, typename Lhs, typename Rhs>
struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs>, IndexBased, IteratorBased>
: sparse_conjunction_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> >
{
typedef CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> XprType;
typedef sparse_conjunction_evaluator<XprType> Base;
explicit binary_evaluator(const XprType& xpr) : Base(xpr) {}
};
// "sparse .* dense"
template<typename T1, typename T2, typename Lhs, typename Rhs>
struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs>, IteratorBased, IndexBased>
: sparse_conjunction_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> >
{
typedef CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> XprType;
typedef sparse_conjunction_evaluator<XprType> Base;
explicit binary_evaluator(const XprType& xpr) : Base(xpr) {}
};
// "sparse && sparse"
template<typename Lhs, typename Rhs>
struct binary_evaluator<CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs>, IteratorBased, IteratorBased>
: sparse_conjunction_evaluator<CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs> >
{
typedef CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs> XprType;
typedef sparse_conjunction_evaluator<XprType> Base;
explicit binary_evaluator(const XprType& xpr) : Base(xpr) {}
};
// "dense && sparse"
template<typename Lhs, typename Rhs>
struct binary_evaluator<CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs>, IndexBased, IteratorBased>
: sparse_conjunction_evaluator<CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs> >
{
typedef CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs> XprType;
typedef sparse_conjunction_evaluator<XprType> Base;
explicit binary_evaluator(const XprType& xpr) : Base(xpr) {}
};
// "sparse && dense"
template<typename Lhs, typename Rhs>
struct binary_evaluator<CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs>, IteratorBased, IndexBased>
: sparse_conjunction_evaluator<CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs> >
{
typedef CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs> XprType;
typedef sparse_conjunction_evaluator<XprType> Base;
explicit binary_evaluator(const XprType& xpr) : Base(xpr) {}
};
// "sparse ^ sparse"
template<typename XprType>
struct sparse_conjunction_evaluator<XprType, IteratorBased, IteratorBased>
: evaluator_base<XprType>
{
protected:
typedef scalar_product_op<T1,T2> BinaryOp;
typedef typename evaluator<Lhs>::InnerIterator LhsIterator;
typedef typename evaluator<Rhs>::InnerIterator RhsIterator;
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
typedef typename XprType::Functor BinaryOp;
typedef typename XprType::Lhs LhsArg;
typedef typename XprType::Rhs RhsArg;
typedef typename evaluator<LhsArg>::InnerIterator LhsIterator;
typedef typename evaluator<RhsArg>::InnerIterator RhsIterator;
typedef typename XprType::StorageIndex StorageIndex;
typedef typename traits<XprType>::Scalar Scalar;
public:
class ReverseInnerIterator;
class InnerIterator
{
public:
EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)
EIGEN_STRONG_INLINE InnerIterator(const sparse_conjunction_evaluator& aEval, Index outer)
: m_lhsIter(aEval.m_lhsImpl,outer), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor)
{
while (m_lhsIter && m_rhsIter && (m_lhsIter.index() != m_rhsIter.index()))
@@ -386,11 +445,11 @@ public:
enum {
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
CoeffReadCost = evaluator<LhsArg>::CoeffReadCost + evaluator<RhsArg>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
Flags = XprType::Flags
};
explicit binary_evaluator(const XprType& xpr)
explicit sparse_conjunction_evaluator(const XprType& xpr)
: m_functor(xpr.functor()),
m_lhsImpl(xpr.lhs()),
m_rhsImpl(xpr.rhs())
@@ -405,32 +464,32 @@ public:
protected:
const BinaryOp m_functor;
evaluator<Lhs> m_lhsImpl;
evaluator<Rhs> m_rhsImpl;
evaluator<LhsArg> m_lhsImpl;
evaluator<RhsArg> m_rhsImpl;
};
// "dense .* sparse"
template<typename T1, typename T2, typename Lhs, typename Rhs>
struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs>, IndexBased, IteratorBased>
: evaluator_base<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> >
// "dense ^ sparse"
template<typename XprType>
struct sparse_conjunction_evaluator<XprType, IndexBased, IteratorBased>
: evaluator_base<XprType>
{
protected:
typedef scalar_product_op<T1,T2> BinaryOp;
typedef evaluator<Lhs> LhsEvaluator;
typedef typename evaluator<Rhs>::InnerIterator RhsIterator;
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
typedef typename XprType::Functor BinaryOp;
typedef typename XprType::Lhs LhsArg;
typedef typename XprType::Rhs RhsArg;
typedef evaluator<LhsArg> LhsEvaluator;
typedef typename evaluator<RhsArg>::InnerIterator RhsIterator;
typedef typename XprType::StorageIndex StorageIndex;
typedef typename traits<XprType>::Scalar Scalar;
public:
class ReverseInnerIterator;
class InnerIterator
{
enum { IsRowMajor = (int(Rhs::Flags)&RowMajorBit)==RowMajorBit };
enum { IsRowMajor = (int(RhsArg::Flags)&RowMajorBit)==RowMajorBit };
public:
EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)
EIGEN_STRONG_INLINE InnerIterator(const sparse_conjunction_evaluator& aEval, Index outer)
: m_lhsEval(aEval.m_lhsImpl), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor), m_outer(outer)
{}
@@ -458,12 +517,12 @@ public:
enum {
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
CoeffReadCost = evaluator<LhsArg>::CoeffReadCost + evaluator<RhsArg>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
// Expose storage order of the sparse expression
Flags = (XprType::Flags & ~RowMajorBit) | (int(Rhs::Flags)&RowMajorBit)
Flags = (XprType::Flags & ~RowMajorBit) | (int(RhsArg::Flags)&RowMajorBit)
};
explicit binary_evaluator(const XprType& xpr)
explicit sparse_conjunction_evaluator(const XprType& xpr)
: m_functor(xpr.functor()),
m_lhsImpl(xpr.lhs()),
m_rhsImpl(xpr.rhs())
@@ -478,32 +537,32 @@ public:
protected:
const BinaryOp m_functor;
evaluator<Lhs> m_lhsImpl;
evaluator<Rhs> m_rhsImpl;
evaluator<LhsArg> m_lhsImpl;
evaluator<RhsArg> m_rhsImpl;
};
// "sparse .* dense"
template<typename T1, typename T2, typename Lhs, typename Rhs>
struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs>, IteratorBased, IndexBased>
: evaluator_base<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> >
// "sparse ^ dense"
template<typename XprType>
struct sparse_conjunction_evaluator<XprType, IteratorBased, IndexBased>
: evaluator_base<XprType>
{
protected:
typedef scalar_product_op<T1,T2> BinaryOp;
typedef typename evaluator<Lhs>::InnerIterator LhsIterator;
typedef evaluator<Rhs> RhsEvaluator;
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
typedef typename XprType::Functor BinaryOp;
typedef typename XprType::Lhs LhsArg;
typedef typename XprType::Rhs RhsArg;
typedef typename evaluator<LhsArg>::InnerIterator LhsIterator;
typedef evaluator<RhsArg> RhsEvaluator;
typedef typename XprType::StorageIndex StorageIndex;
typedef typename traits<XprType>::Scalar Scalar;
public:
class ReverseInnerIterator;
class InnerIterator
{
enum { IsRowMajor = (int(Lhs::Flags)&RowMajorBit)==RowMajorBit };
enum { IsRowMajor = (int(LhsArg::Flags)&RowMajorBit)==RowMajorBit };
public:
EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)
EIGEN_STRONG_INLINE InnerIterator(const sparse_conjunction_evaluator& aEval, Index outer)
: m_lhsIter(aEval.m_lhsImpl,outer), m_rhsEval(aEval.m_rhsImpl), m_functor(aEval.m_functor), m_outer(outer)
{}
@@ -525,19 +584,19 @@ public:
protected:
LhsIterator m_lhsIter;
const evaluator<Rhs> &m_rhsEval;
const evaluator<RhsArg> &m_rhsEval;
const BinaryOp& m_functor;
const Index m_outer;
};
enum {
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
CoeffReadCost = evaluator<LhsArg>::CoeffReadCost + evaluator<RhsArg>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
// Expose storage order of the sparse expression
Flags = (XprType::Flags & ~RowMajorBit) | (int(Lhs::Flags)&RowMajorBit)
Flags = (XprType::Flags & ~RowMajorBit) | (int(LhsArg::Flags)&RowMajorBit)
};
explicit binary_evaluator(const XprType& xpr)
explicit sparse_conjunction_evaluator(const XprType& xpr)
: m_functor(xpr.functor()),
m_lhsImpl(xpr.lhs()),
m_rhsImpl(xpr.rhs())
@@ -552,8 +611,8 @@ public:
protected:
const BinaryOp m_functor;
evaluator<Lhs> m_lhsImpl;
evaluator<Rhs> m_rhsImpl;
evaluator<LhsArg> m_lhsImpl;
evaluator<RhsArg> m_rhsImpl;
};
}

View File

@@ -22,7 +22,6 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>
typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
class InnerIterator;
class ReverseInnerIterator;
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
@@ -41,7 +40,6 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>
protected:
typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
// typedef typename evaluator<ArgType>::ReverseInnerIterator EvalReverseIterator;
const UnaryOp m_functor;
evaluator<ArgType> m_argImpl;
@@ -70,33 +68,6 @@ class unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::InnerIterat
Scalar& valueRef();
};
// template<typename UnaryOp, typename ArgType>
// class unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::ReverseInnerIterator
// : public unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalReverseIterator
// {
// typedef typename XprType::Scalar Scalar;
// typedef typename unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalReverseIterator Base;
// public:
//
// EIGEN_STRONG_INLINE ReverseInnerIterator(const XprType& unaryOp, typename XprType::Index outer)
// : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor())
// {}
//
// EIGEN_STRONG_INLINE ReverseInnerIterator& operator--()
// { Base::operator--(); return *this; }
//
// EIGEN_STRONG_INLINE Scalar value() const { return m_functor(Base::value()); }
//
// protected:
// const UnaryOp m_functor;
// private:
// Scalar& valueRef();
// };
template<typename ViewOp, typename ArgType>
struct unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>
: public evaluator_base<CwiseUnaryView<ViewOp,ArgType> >
@@ -105,7 +76,6 @@ struct unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>
typedef CwiseUnaryView<ViewOp, ArgType> XprType;
class InnerIterator;
class ReverseInnerIterator;
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<ViewOp>::Cost,
@@ -120,7 +90,6 @@ struct unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>
protected:
typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
// typedef typename evaluator<ArgType>::ReverseInnerIterator EvalReverseIterator;
const ViewOp m_functor;
evaluator<ArgType> m_argImpl;
@@ -148,29 +117,6 @@ class unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::InnerItera
const ViewOp m_functor;
};
// template<typename ViewOp, typename ArgType>
// class unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::ReverseInnerIterator
// : public unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalReverseIterator
// {
// typedef typename XprType::Scalar Scalar;
// typedef typename unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalReverseIterator Base;
// public:
//
// EIGEN_STRONG_INLINE ReverseInnerIterator(const XprType& unaryOp, typename XprType::Index outer)
// : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor())
// {}
//
// EIGEN_STRONG_INLINE ReverseInnerIterator& operator--()
// { Base::operator--(); return *this; }
//
// EIGEN_STRONG_INLINE Scalar value() const { return m_functor(Base::value()); }
// EIGEN_STRONG_INLINE Scalar& valueRef() { return m_functor(Base::valueRef()); }
//
// protected:
// const ViewOp m_functor;
// };
} // end namespace internal
template<typename Derived>

View File

@@ -94,6 +94,7 @@ class SparseMatrix
{
typedef SparseCompressedBase<SparseMatrix> Base;
using Base::convert_index;
friend class SparseVector<_Scalar,0,_Index>;
public:
using Base::isCompressed;
using Base::nonZeros;
@@ -785,30 +786,38 @@ class SparseMatrix
EIGEN_DBG_SPARSE(
s << "Nonzero entries:\n";
if(m.isCompressed())
{
for (Index i=0; i<m.nonZeros(); ++i)
s << "(" << m.m_data.value(i) << "," << m.m_data.index(i) << ") ";
}
else
{
for (Index i=0; i<m.outerSize(); ++i)
{
Index p = m.m_outerIndex[i];
Index pe = m.m_outerIndex[i]+m.m_innerNonZeros[i];
Index k=p;
for (; k<pe; ++k)
for (; k<pe; ++k) {
s << "(" << m.m_data.value(k) << "," << m.m_data.index(k) << ") ";
for (; k<m.m_outerIndex[i+1]; ++k)
}
for (; k<m.m_outerIndex[i+1]; ++k) {
s << "(_,_) ";
}
}
}
s << std::endl;
s << std::endl;
s << "Outer pointers:\n";
for (Index i=0; i<m.outerSize(); ++i)
for (Index i=0; i<m.outerSize(); ++i) {
s << m.m_outerIndex[i] << " ";
}
s << " $" << std::endl;
if(!m.isCompressed())
{
s << "Inner non zeros:\n";
for (Index i=0; i<m.outerSize(); ++i)
for (Index i=0; i<m.outerSize(); ++i) {
s << m.m_innerNonZeros[i] << " ";
}
s << " $" << std::endl;
}
s << std::endl;

View File

@@ -214,10 +214,11 @@ template<typename Derived> class SparseMatrixBase
if (Flags&RowMajorBit)
{
const Nested nm(m.derived());
internal::evaluator<NestedCleaned> thisEval(nm);
for (Index row=0; row<nm.outerSize(); ++row)
{
Index col = 0;
for (typename NestedCleaned::InnerIterator it(nm.derived(), row); it; ++it)
for (typename internal::evaluator<NestedCleaned>::InnerIterator it(thisEval, row); it; ++it)
{
for ( ; col<it.index(); ++col)
s << "0 ";
@@ -232,9 +233,10 @@ template<typename Derived> class SparseMatrixBase
else
{
const Nested nm(m.derived());
internal::evaluator<NestedCleaned> thisEval(nm);
if (m.cols() == 1) {
Index row = 0;
for (typename NestedCleaned::InnerIterator it(nm.derived(), 0); it; ++it)
for (typename internal::evaluator<NestedCleaned>::InnerIterator it(thisEval, 0); it; ++it)
{
for ( ; row<it.index(); ++row)
s << "0" << std::endl;

View File

@@ -104,6 +104,11 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,AliasFreeProduct>, internal::assig
typedef Product<Lhs,Rhs,AliasFreeProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
generic_product_impl<Lhs, Rhs>::evalTo(dst,src.lhs(),src.rhs());
}
};

View File

@@ -185,20 +185,27 @@ class Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType
EIGEN_SPARSE_PUBLIC_INTERFACE(Ref)
template<typename Derived>
inline Ref(const SparseMatrixBase<Derived>& expr)
inline Ref(const SparseMatrixBase<Derived>& expr) : m_hasCopy(false)
{
construct(expr.derived(), typename Traits::template match<Derived>::type());
}
inline Ref(const Ref& other) : Base(other) {
inline Ref(const Ref& other) : Base(other), m_hasCopy(false) {
// copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy
}
template<typename OtherRef>
inline Ref(const RefBase<OtherRef>& other) {
inline Ref(const RefBase<OtherRef>& other) : m_hasCopy(false) {
construct(other.derived(), typename Traits::template match<OtherRef>::type());
}
~Ref() {
if(m_hasCopy) {
TPlainObjectType* obj = reinterpret_cast<TPlainObjectType*>(m_object_bytes);
obj->~TPlainObjectType();
}
}
protected:
template<typename Expression>
@@ -208,6 +215,7 @@ class Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType
{
TPlainObjectType* obj = reinterpret_cast<TPlainObjectType*>(m_object_bytes);
::new (obj) TPlainObjectType(expr);
m_hasCopy = true;
Base::construct(*obj);
}
else
@@ -221,11 +229,13 @@ class Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType
{
TPlainObjectType* obj = reinterpret_cast<TPlainObjectType*>(m_object_bytes);
::new (obj) TPlainObjectType(expr);
m_hasCopy = true;
Base::construct(*obj);
}
protected:
char m_object_bytes[sizeof(TPlainObjectType)];
bool m_hasCopy;
};
@@ -293,20 +303,27 @@ class Ref<const SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType
EIGEN_SPARSE_PUBLIC_INTERFACE(Ref)
template<typename Derived>
inline Ref(const SparseMatrixBase<Derived>& expr)
inline Ref(const SparseMatrixBase<Derived>& expr) : m_hasCopy(false)
{
construct(expr.derived(), typename Traits::template match<Derived>::type());
}
inline Ref(const Ref& other) : Base(other) {
inline Ref(const Ref& other) : Base(other), m_hasCopy(false) {
// copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy
}
template<typename OtherRef>
inline Ref(const RefBase<OtherRef>& other) {
inline Ref(const RefBase<OtherRef>& other) : m_hasCopy(false) {
construct(other.derived(), typename Traits::template match<OtherRef>::type());
}
~Ref() {
if(m_hasCopy) {
TPlainObjectType* obj = reinterpret_cast<TPlainObjectType*>(m_object_bytes);
obj->~TPlainObjectType();
}
}
protected:
template<typename Expression>
@@ -320,11 +337,13 @@ class Ref<const SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType
{
TPlainObjectType* obj = reinterpret_cast<TPlainObjectType*>(m_object_bytes);
::new (obj) TPlainObjectType(expr);
m_hasCopy = true;
Base::construct(*obj);
}
protected:
char m_object_bytes[sizeof(TPlainObjectType)];
bool m_hasCopy;
};
namespace internal {

View File

@@ -19,7 +19,8 @@ namespace internal {
* The rhs is decomposed into small vertical panels which are solved through dense temporaries.
*/
template<typename Decomposition, typename Rhs, typename Dest>
void solve_sparse_through_dense_panels(const Decomposition &dec, const Rhs& rhs, Dest &dest)
typename enable_if<Rhs::ColsAtCompileTime!=1 && Dest::ColsAtCompileTime!=1>::type
solve_sparse_through_dense_panels(const Decomposition &dec, const Rhs& rhs, Dest &dest)
{
EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
typedef typename Dest::Scalar DestScalar;
@@ -40,6 +41,19 @@ void solve_sparse_through_dense_panels(const Decomposition &dec, const Rhs& rhs,
}
}
// Overload for vector as rhs
template<typename Decomposition, typename Rhs, typename Dest>
typename enable_if<Rhs::ColsAtCompileTime==1 || Dest::ColsAtCompileTime==1>::type
solve_sparse_through_dense_panels(const Decomposition &dec, const Rhs& rhs, Dest &dest)
{
typedef typename Dest::Scalar DestScalar;
Index size = rhs.rows();
Eigen::Matrix<DestScalar,Dynamic,1> rhs_dense(rhs);
Eigen::Matrix<DestScalar,Dynamic,1> dest_dense(size);
dest_dense = dec.solve(rhs_dense);
dest = dest_dense.sparseView();
}
} // end namespace internal
/** \class SparseSolverBase

View File

@@ -56,7 +56,6 @@ struct unary_evaluator<Transpose<ArgType>, IteratorBased>
: public evaluator_base<Transpose<ArgType> >
{
typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
typedef typename evaluator<ArgType>::ReverseInnerIterator EvalReverseIterator;
public:
typedef Transpose<ArgType> XprType;
@@ -75,17 +74,6 @@ struct unary_evaluator<Transpose<ArgType>, IteratorBased>
Index col() const { return EvalIterator::row(); }
};
class ReverseInnerIterator : public EvalReverseIterator
{
public:
EIGEN_STRONG_INLINE ReverseInnerIterator(const unary_evaluator& unaryOp, Index outer)
: EvalReverseIterator(unaryOp.m_argImpl,outer)
{}
Index row() const { return EvalReverseIterator::col(); }
Index col() const { return EvalReverseIterator::row(); }
};
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
Flags = XprType::Flags

View File

@@ -290,6 +290,14 @@ class SparseVector
m_data.swap(other.m_data);
}
template<int OtherOptions>
inline void swap(SparseMatrix<Scalar,OtherOptions,StorageIndex>& other)
{
eigen_assert(other.outerSize()==1);
std::swap(m_size, other.m_innerSize);
m_data.swap(other.m_data);
}
inline SparseVector& operator=(const SparseVector& other)
{
if (other.isRValue())
@@ -403,6 +411,7 @@ struct evaluator<SparseVector<_Scalar,_Options,_Index> >
: evaluator_base<SparseVector<_Scalar,_Options,_Index> >
{
typedef SparseVector<_Scalar,_Options,_Index> SparseVectorType;
typedef evaluator_base<SparseVectorType> Base;
typedef typename SparseVectorType::InnerIterator InnerIterator;
typedef typename SparseVectorType::ReverseInnerIterator ReverseInnerIterator;
@@ -410,20 +419,22 @@ struct evaluator<SparseVector<_Scalar,_Options,_Index> >
CoeffReadCost = NumTraits<_Scalar>::ReadCost,
Flags = SparseVectorType::Flags
};
evaluator() : Base() {}
explicit evaluator(const SparseVectorType &mat) : m_matrix(mat)
explicit evaluator(const SparseVectorType &mat) : m_matrix(&mat)
{
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
}
inline Index nonZerosEstimate() const {
return m_matrix.nonZeros();
return m_matrix->nonZeros();
}
operator SparseVectorType&() { return m_matrix.const_cast_derived(); }
operator const SparseVectorType&() const { return m_matrix; }
operator SparseVectorType&() { return m_matrix->const_cast_derived(); }
operator const SparseVectorType&() const { return *m_matrix; }
const SparseVectorType &m_matrix;
const SparseVectorType *m_matrix;
};
template< typename Dest, typename Src>

View File

@@ -106,22 +106,22 @@ void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const
#define KMADD(c, a, b, tmp) {tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);}
#define WORK(I) \
c0 = pload<Packet>(C0+i+(I)*PacketSize); \
c1 = pload<Packet>(C1+i+(I)*PacketSize); \
KMADD(c0, a0, b00, t0) \
KMADD(c1, a0, b01, t1) \
a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
KMADD(c0, a1, b10, t0) \
KMADD(c1, a1, b11, t1) \
a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
if(RK==4) KMADD(c0, a2, b20, t0) \
if(RK==4) KMADD(c1, a2, b21, t1) \
if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \
if(RK==4) KMADD(c0, a3, b30, t0) \
if(RK==4) KMADD(c1, a3, b31, t1) \
if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \
pstore(C0+i+(I)*PacketSize, c0); \
pstore(C1+i+(I)*PacketSize, c1)
c0 = pload<Packet>(C0+i+(I)*PacketSize); \
c1 = pload<Packet>(C1+i+(I)*PacketSize); \
KMADD(c0, a0, b00, t0) \
KMADD(c1, a0, b01, t1) \
a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
KMADD(c0, a1, b10, t0) \
KMADD(c1, a1, b11, t1) \
a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
if(RK==4){ KMADD(c0, a2, b20, t0) }\
if(RK==4){ KMADD(c1, a2, b21, t1) }\
if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
if(RK==4){ KMADD(c0, a3, b30, t0) }\
if(RK==4){ KMADD(c1, a3, b31, t1) }\
if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
pstore(C0+i+(I)*PacketSize, c0); \
pstore(C1+i+(I)*PacketSize, c1)
// process rows of A' - C' with aggressive vectorization and peeling
for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
@@ -131,14 +131,15 @@ void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const
prefetch((A1+i+(5)*PacketSize));
if(RK==4) prefetch((A2+i+(5)*PacketSize));
if(RK==4) prefetch((A3+i+(5)*PacketSize));
WORK(0);
WORK(1);
WORK(2);
WORK(3);
WORK(4);
WORK(5);
WORK(6);
WORK(7);
WORK(0);
WORK(1);
WORK(2);
WORK(3);
WORK(4);
WORK(5);
WORK(6);
WORK(7);
}
// process the remaining rows with vectorization only
for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
@@ -203,16 +204,16 @@ void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const
}
#define WORK(I) \
c0 = pload<Packet>(C0+i+(I)*PacketSize); \
KMADD(c0, a0, b00, t0) \
a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
KMADD(c0, a1, b10, t0) \
a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
if(RK==4) KMADD(c0, a2, b20, t0) \
if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \
if(RK==4) KMADD(c0, a3, b30, t0) \
if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \
pstore(C0+i+(I)*PacketSize, c0);
c0 = pload<Packet>(C0+i+(I)*PacketSize); \
KMADD(c0, a0, b00, t0) \
a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
KMADD(c0, a1, b10, t0) \
a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
if(RK==4){ KMADD(c0, a2, b20, t0) }\
if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
if(RK==4){ KMADD(c0, a3, b30, t0) }\
if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
pstore(C0+i+(I)*PacketSize, c0);
// agressive vectorization and peeling
for(Index i=0; i<actual_b_end1; i+=PacketSize*8)

View File

@@ -269,44 +269,6 @@ const CwiseBinaryOp<internal::scalar_difference_op<T,Scalar>,Constant<T>,Derived
operator/(const T& s,const StorageBaseType& a);
#endif
/** \returns an expression of the coefficient-wise && operator of *this and \a other
*
* \warning this operator is for expression of bool only.
*
* Example: \include Cwise_boolean_and.cpp
* Output: \verbinclude Cwise_boolean_and.out
*
* \sa operator||(), select()
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
inline const CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>
operator&&(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
{
EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
return CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>(derived(),other.derived());
}
/** \returns an expression of the coefficient-wise || operator of *this and \a other
*
* \warning this operator is for expression of bool only.
*
* Example: \include Cwise_boolean_or.cpp
* Output: \verbinclude Cwise_boolean_or.out
*
* \sa operator&&(), select()
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
inline const CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>
operator||(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
{
EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
return CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>(derived(),other.derived());
}
/** \returns an expression of the coefficient-wise ^ operator of *this and \a other
*
* \warning this operator is for expression of bool only.

View File

@@ -75,3 +75,41 @@ EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient)
template<typename T>
const CwiseBinaryOp<internal::scalar_quotient_op<Scalar,T>,Derived,Constant<T> > operator/(const T& scalar) const;
#endif
/** \returns an expression of the coefficient-wise boolean \b and operator of \c *this and \a other
*
* \warning this operator is for expression of bool only.
*
* Example: \include Cwise_boolean_and.cpp
* Output: \verbinclude Cwise_boolean_and.out
*
* \sa operator||(), select()
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
inline const CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>
operator&&(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
{
EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
return CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>(derived(),other.derived());
}
/** \returns an expression of the coefficient-wise boolean \b or operator of \c *this and \a other
*
* \warning this operator is for expression of bool only.
*
* Example: \include Cwise_boolean_or.cpp
* Output: \verbinclude Cwise_boolean_or.out
*
* \sa operator&&(), select()
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
inline const CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>
operator||(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
{
EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
return CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>(derived(),other.derived());
}

View File

@@ -11,5 +11,11 @@ nvcc tensor_benchmarks_gpu.cu benchmark_main.cc -I ../../ -std=c++11 -O2 -DNDEBU
We also provide a version of the generic GPU tensor benchmarks that uses half floats (aka fp16) instead of regular floats. To compile these benchmarks, simply call the command line below. You'll need a recent GPU that supports compute capability 5.3 or higher to run them and nvcc 7.5 or higher to compile the code.
nvcc tensor_benchmarks_fp16_gpu.cu benchmark_main.cc -I ../../ -std=c++11 -O2 -DNDEBUG -use_fast_math -ftz=true -arch compute_53 -o benchmarks_fp16_gpu
last but not least, we also provide a suite of benchmarks to measure the scalability of the contraction code on CPU. To compile these benchmarks, call
last but not least, we also provide a suite of benchmarks to measure the scalability of the contraction code on CPU. To compile these benchmarks, call
g++ contraction_benchmarks_cpu.cc benchmark_main.cc -I ../../ -std=c++11 -O3 -DNDEBUG -pthread -mavx -o benchmarks_cpu
To compile the benchmark for SYCL, using ComputeCpp you currently need 2 passes (only for translation units containing device code):
1. The device compilation pass that generates the device code (SYCL kernels and referenced device functions) and glue code needed by the host compiler to reference the device code from host code.
{ComputeCpp_ROOT}/bin/compute++ -I ../../ -I {ComputeCpp_ROOT}/include/ -std=c++11 -mllvm -inline-threshold=1000 -Wno-ignored-attributes -sycl -intelspirmetadata -emit-llvm -no-serial-memop -sycl-compress-name -DBUILD_PLATFORM_SPIR -DNDBUG -O3 -c tensor_benchmarks_sycl.cc
2. The host compilation pass that generates the final host binary.
clang++-3.7 -include tensor_benchmarks_sycl.sycl benchmark_main.cc tensor_benchmarks_sycl.cc -pthread -I ../../ -I {ComputeCpp_ROOT}/include/ -L {ComputeCpp_ROOT}/lib/ -lComputeCpp -lOpenCL -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++11 -o tensor_benchmark_sycl

View File

@@ -0,0 +1,37 @@
#define EIGEN_USE_SYCL
#include <SYCL/sycl.hpp>
#include <iostream>
#include "tensor_benchmarks.h"
using Eigen::array;
using Eigen::SyclDevice;
using Eigen::Tensor;
using Eigen::TensorMap;
// Simple functions
template <typename device_selector>
cl::sycl::queue sycl_queue() {
return cl::sycl::queue(device_selector(), [=](cl::sycl::exception_list l) {
for (const auto& e : l) {
try {
std::rethrow_exception(e);
} catch (cl::sycl::exception e) {
std::cout << e.what() << std::endl;
}
}
});
}
#define BM_FuncGPU(FUNC) \
static void BM_##FUNC(int iters, int N) { \
StopBenchmarkTiming(); \
cl::sycl::queue q = sycl_queue<cl::sycl::gpu_selector>(); \
Eigen::SyclDevice device(q); \
BenchmarkSuite<Eigen::SyclDevice, float> suite(device, N); \
suite.FUNC(iters); \
} \
BENCHMARK_RANGE(BM_##FUNC, 10, 5000);
BM_FuncGPU(broadcasting);
BM_FuncGPU(coeffWiseOp);

View File

@@ -1,28 +1,21 @@
# -*- cmake -*-
#
# Eigen3Config.cmake(.in)
# This file exports the Eigen3::Eigen CMake target which should be passed to the
# target_link_libraries command.
# Use the following variables to compile and link against Eigen:
# EIGEN3_FOUND - True if Eigen was found on your system
# EIGEN3_USE_FILE - The file making Eigen usable
# EIGEN3_DEFINITIONS - Definitions needed to build with Eigen
# EIGEN3_INCLUDE_DIR - Directory where signature_of_eigen3_matrix_library can be found
# EIGEN3_INCLUDE_DIRS - List of directories of Eigen and it's dependencies
# EIGEN3_ROOT_DIR - The base directory of Eigen
# EIGEN3_VERSION_STRING - A human-readable string containing the version
# EIGEN3_VERSION_MAJOR - The major version of Eigen
# EIGEN3_VERSION_MINOR - The minor version of Eigen
# EIGEN3_VERSION_PATCH - The patch version of Eigen
@PACKAGE_INIT@
set ( EIGEN3_FOUND 1 )
set ( EIGEN3_USE_FILE "${CMAKE_CURRENT_LIST_DIR}/UseEigen3.cmake" )
include ("${CMAKE_CURRENT_LIST_DIR}/Eigen3Targets.cmake")
set ( EIGEN3_DEFINITIONS "@EIGEN_DEFINITIONS@" )
set ( EIGEN3_INCLUDE_DIR "@EIGEN_INCLUDE_DIR@" )
set ( EIGEN3_INCLUDE_DIRS "@EIGEN_INCLUDE_DIRS@" )
set ( EIGEN3_ROOT_DIR "@EIGEN_ROOT_DIR@" )
# Legacy variables, do *not* use. May be removed in the future.
set ( EIGEN3_VERSION_STRING "@EIGEN_VERSION_STRING@" )
set ( EIGEN3_VERSION_MAJOR "@EIGEN_VERSION_MAJOR@" )
set ( EIGEN3_VERSION_MINOR "@EIGEN_VERSION_MINOR@" )
set ( EIGEN3_VERSION_PATCH "@EIGEN_VERSION_PATCH@" )
set (EIGEN3_FOUND 1)
set (EIGEN3_USE_FILE "${CMAKE_CURRENT_LIST_DIR}/UseEigen3.cmake")
set (EIGEN3_DEFINITIONS "@EIGEN_DEFINITIONS@")
set (EIGEN3_INCLUDE_DIR "@PACKAGE_EIGEN_INCLUDE_DIR@")
set (EIGEN3_INCLUDE_DIRS "@PACKAGE_EIGEN_INCLUDE_DIR@")
set (EIGEN3_ROOT_DIR "@PACKAGE_EIGEN_ROOT_DIR@")
set (EIGEN3_VERSION_STRING "@EIGEN_VERSION_STRING@")
set (EIGEN3_VERSION_MAJOR "@EIGEN_VERSION_MAJOR@")
set (EIGEN3_VERSION_MINOR "@EIGEN_VERSION_MINOR@")
set (EIGEN3_VERSION_PATCH "@EIGEN_VERSION_PATCH@")

View File

@@ -0,0 +1,30 @@
# -*- cmake -*-
#
# Eigen3Config.cmake(.in)
# Use the following variables to compile and link against Eigen:
# EIGEN3_FOUND - True if Eigen was found on your system
# EIGEN3_USE_FILE - The file making Eigen usable
# EIGEN3_DEFINITIONS - Definitions needed to build with Eigen
# EIGEN3_INCLUDE_DIR - Directory where signature_of_eigen3_matrix_library can be found
# EIGEN3_INCLUDE_DIRS - List of directories of Eigen and it's dependencies
# EIGEN3_ROOT_DIR - The base directory of Eigen
# EIGEN3_VERSION_STRING - A human-readable string containing the version
# EIGEN3_VERSION_MAJOR - The major version of Eigen
# EIGEN3_VERSION_MINOR - The minor version of Eigen
# EIGEN3_VERSION_PATCH - The patch version of Eigen
@PACKAGE_INIT@
set ( EIGEN3_FOUND 1 )
set ( EIGEN3_USE_FILE "${CMAKE_CURRENT_LIST_DIR}/UseEigen3.cmake" )
set ( EIGEN3_DEFINITIONS "@EIGEN_DEFINITIONS@" )
set ( EIGEN3_INCLUDE_DIR "@PACKAGE_EIGEN_INCLUDE_DIR@" )
set ( EIGEN3_INCLUDE_DIRS "@PACKAGE_EIGEN_INCLUDE_DIR@" )
set ( EIGEN3_ROOT_DIR "@PACKAGE_EIGEN_ROOT_DIR@" )
set ( EIGEN3_VERSION_STRING "@EIGEN_VERSION_STRING@" )
set ( EIGEN3_VERSION_MAJOR "@EIGEN_VERSION_MAJOR@" )
set ( EIGEN3_VERSION_MINOR "@EIGEN_VERSION_MINOR@" )
set ( EIGEN3_VERSION_PATCH "@EIGEN_VERSION_PATCH@" )

View File

@@ -1,23 +1,23 @@
macro(ei_add_property prop value)
get_property(previous GLOBAL PROPERTY ${prop})
get_property(previous GLOBAL PROPERTY ${prop})
if ((NOT previous) OR (previous STREQUAL ""))
set_property(GLOBAL PROPERTY ${prop} "${value}")
else()
set_property(GLOBAL PROPERTY ${prop} "${previous} ${value}")
endif()
endif()
endmacro(ei_add_property)
#internal. See documentation of ei_add_test for details.
macro(ei_add_test_internal testname testname_with_suffix)
set(targetname ${testname_with_suffix})
if(EIGEN_ADD_TEST_FILENAME_EXTENSION)
set(filename ${testname}.${EIGEN_ADD_TEST_FILENAME_EXTENSION})
else()
set(filename ${testname}.cpp)
endif()
if(EIGEN_ADD_TEST_FILENAME_EXTENSION STREQUAL cu)
if(EIGEN_TEST_CUDA_CLANG)
set_source_files_properties(${filename} PROPERTIES LANGUAGE CXX)
@@ -42,7 +42,7 @@ macro(ei_add_test_internal testname testname_with_suffix)
else()
add_executable(${targetname} ${filename})
endif()
if (targetname MATCHES "^eigen2_")
add_dependencies(eigen2_buildtests ${targetname})
else()
@@ -56,20 +56,20 @@ macro(ei_add_test_internal testname testname_with_suffix)
ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_DEBUG_ASSERTS=1")
endif(EIGEN_DEBUG_ASSERTS)
endif(EIGEN_NO_ASSERTION_CHECKING)
ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_TEST_MAX_SIZE=${EIGEN_TEST_MAX_SIZE}")
ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_TEST_FUNC=${testname}")
if(MSVC AND NOT EIGEN_SPLIT_LARGE_TESTS)
if(MSVC)
ei_add_target_property(${targetname} COMPILE_FLAGS "/bigobj")
endif()
endif()
# let the user pass flags.
if(${ARGC} GREATER 2)
ei_add_target_property(${targetname} COMPILE_FLAGS "${ARGV2}")
endif(${ARGC} GREATER 2)
if(EIGEN_TEST_CUSTOM_CXX_FLAGS)
ei_add_target_property(${targetname} COMPILE_FLAGS "${EIGEN_TEST_CUSTOM_CXX_FLAGS}")
endif()
@@ -95,12 +95,12 @@ macro(ei_add_test_internal testname testname_with_suffix)
# notice: no double quotes around ${libs_to_link} here. It may be a list.
target_link_libraries(${targetname} ${libs_to_link})
endif()
endif()
endif()
add_test(${testname_with_suffix} "${targetname}")
# Specify target and test labels accoirding to EIGEN_CURRENT_SUBPROJECT
get_property(current_subproject GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT)
get_property(current_subproject GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT)
if ((current_subproject) AND (NOT (current_subproject STREQUAL "")))
set_property(TARGET ${targetname} PROPERTY LABELS "Build${current_subproject}")
add_dependencies("Build${current_subproject}" ${targetname})
@@ -109,6 +109,103 @@ macro(ei_add_test_internal testname testname_with_suffix)
endmacro(ei_add_test_internal)
# SYCL
macro(ei_add_test_internal_sycl testname testname_with_suffix)
include_directories( SYSTEM ${COMPUTECPP_PACKAGE_ROOT_DIR}/include)
set(targetname ${testname_with_suffix})
if(EIGEN_ADD_TEST_FILENAME_EXTENSION)
set(filename ${testname}.${EIGEN_ADD_TEST_FILENAME_EXTENSION})
else()
set(filename ${testname}.cpp)
endif()
set( include_file ${CMAKE_CURRENT_BINARY_DIR}/inc_${filename})
set( bc_file ${CMAKE_CURRENT_BINARY_DIR}/${filename})
set( host_file ${CMAKE_CURRENT_SOURCE_DIR}/${filename})
ADD_CUSTOM_COMMAND(
OUTPUT ${include_file}
COMMAND ${CMAKE_COMMAND} -E echo "\\#include \\\"${host_file}\\\"" > ${include_file}
COMMAND ${CMAKE_COMMAND} -E echo "\\#include \\\"${bc_file}.sycl\\\"" >> ${include_file}
DEPENDS ${filename} ${bc_file}.sycl
COMMENT "Building ComputeCpp integration header file ${include_file}"
)
# Add a custom target for the generated integration header
add_custom_target(${testname}_integration_header_sycl DEPENDS ${include_file})
add_executable(${targetname} ${include_file})
add_dependencies(${targetname} ${testname}_integration_header_sycl)
add_sycl_to_target(${targetname} ${filename} ${CMAKE_CURRENT_BINARY_DIR})
if (targetname MATCHES "^eigen2_")
add_dependencies(eigen2_buildtests ${targetname})
else()
add_dependencies(buildtests ${targetname})
endif()
if(EIGEN_NO_ASSERTION_CHECKING)
ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_NO_ASSERTION_CHECKING=1")
else(EIGEN_NO_ASSERTION_CHECKING)
if(EIGEN_DEBUG_ASSERTS)
ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_DEBUG_ASSERTS=1")
endif(EIGEN_DEBUG_ASSERTS)
endif(EIGEN_NO_ASSERTION_CHECKING)
ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_TEST_MAX_SIZE=${EIGEN_TEST_MAX_SIZE}")
ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_TEST_FUNC=${testname}")
if(MSVC AND NOT EIGEN_SPLIT_LARGE_TESTS)
ei_add_target_property(${targetname} COMPILE_FLAGS "/bigobj")
endif()
# let the user pass flags.
if(${ARGC} GREATER 2)
ei_add_target_property(${targetname} COMPILE_FLAGS "${ARGV2}")
endif(${ARGC} GREATER 2)
if(EIGEN_TEST_CUSTOM_CXX_FLAGS)
ei_add_target_property(${targetname} COMPILE_FLAGS "${EIGEN_TEST_CUSTOM_CXX_FLAGS}")
endif()
if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
target_link_libraries(${targetname} ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO})
endif()
if(EXTERNAL_LIBS)
target_link_libraries(${targetname} ${EXTERNAL_LIBS})
endif()
if(EIGEN_TEST_CUSTOM_LINKER_FLAGS)
target_link_libraries(${targetname} ${EIGEN_TEST_CUSTOM_LINKER_FLAGS})
endif()
if(${ARGC} GREATER 3)
set(libs_to_link ${ARGV3})
# it could be that some cmake module provides a bad library string " " (just spaces),
# and that severely breaks target_link_libraries ("can't link to -l-lstdc++" errors).
# so we check for strings containing only spaces.
string(STRIP "${libs_to_link}" libs_to_link_stripped)
string(LENGTH "${libs_to_link_stripped}" libs_to_link_stripped_length)
if(${libs_to_link_stripped_length} GREATER 0)
# notice: no double quotes around ${libs_to_link} here. It may be a list.
target_link_libraries(${targetname} ${libs_to_link})
endif()
endif()
add_test(${testname_with_suffix} "${targetname}")
# Specify target and test labels according to EIGEN_CURRENT_SUBPROJECT
get_property(current_subproject GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT)
if ((current_subproject) AND (NOT (current_subproject STREQUAL "")))
set_property(TARGET ${targetname} PROPERTY LABELS "Build${current_subproject}")
add_dependencies("Build${current_subproject}" ${targetname})
set_property(TEST ${testname_with_suffix} PROPERTY LABELS "${current_subproject}")
endif()
endmacro(ei_add_test_internal_sycl)
# Macro to add a test
#
# the unique mandatory parameter testname must correspond to a file
@@ -161,7 +258,7 @@ macro(ei_add_test testname)
else()
set(filename ${testname}.cpp)
endif()
file(READ "${filename}" test_source)
set(parts 0)
string(REGEX MATCHALL "CALL_SUBTEST_[0-9]+|EIGEN_TEST_PART_[0-9]+|EIGEN_SUFFIXES(;[0-9]+)+"
@@ -185,6 +282,39 @@ macro(ei_add_test testname)
endif(EIGEN_SPLIT_LARGE_TESTS AND suffixes)
endmacro(ei_add_test)
macro(ei_add_test_sycl testname)
get_property(EIGEN_TESTS_LIST GLOBAL PROPERTY EIGEN_TESTS_LIST)
set(EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}${testname}\n")
set_property(GLOBAL PROPERTY EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}")
if(EIGEN_ADD_TEST_FILENAME_EXTENSION)
set(filename ${testname}.${EIGEN_ADD_TEST_FILENAME_EXTENSION})
else()
set(filename ${testname}.cpp)
endif()
file(READ "${filename}" test_source)
set(parts 0)
string(REGEX MATCHALL "CALL_SUBTEST_[0-9]+|EIGEN_TEST_PART_[0-9]+|EIGEN_SUFFIXES(;[0-9]+)+"
occurences "${test_source}")
string(REGEX REPLACE "CALL_SUBTEST_|EIGEN_TEST_PART_|EIGEN_SUFFIXES" "" suffixes "${occurences}")
list(REMOVE_DUPLICATES suffixes)
if(EIGEN_SPLIT_LARGE_TESTS AND suffixes)
add_custom_target(${testname})
foreach(suffix ${suffixes})
ei_add_test_internal_sycl(${testname} ${testname}_${suffix}
"${ARGV1} -DEIGEN_TEST_PART_${suffix}=1" "${ARGV2}")
add_dependencies(${testname} ${testname}_${suffix})
endforeach(suffix)
else(EIGEN_SPLIT_LARGE_TESTS AND suffixes)
set(symbols_to_enable_all_parts "")
foreach(suffix ${suffixes})
set(symbols_to_enable_all_parts
"${symbols_to_enable_all_parts} -DEIGEN_TEST_PART_${suffix}=1")
endforeach(suffix)
ei_add_test_internal_sycl(${testname} ${testname} "${ARGV1} ${symbols_to_enable_all_parts}" "${ARGV2}")
endif(EIGEN_SPLIT_LARGE_TESTS AND suffixes)
endmacro(ei_add_test_sycl)
# adds a failtest, i.e. a test that succeed if the program fails to compile
# note that the test runner for these is CMake itself, when passed -DEIGEN_FAILTEST=ON
@@ -249,7 +379,7 @@ macro(ei_testing_print_summary)
elseif(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
message(STATUS "Explicit vectorization disabled (alignment kept enabled)")
else()
message(STATUS "Maximal matrix/vector size: ${EIGEN_TEST_MAX_SIZE}")
if(EIGEN_TEST_SSE2)
@@ -288,12 +418,18 @@ macro(ei_testing_print_summary)
message(STATUS "AVX: Using architecture defaults")
endif()
if(EIGEN_TEST_FMA)
if(EIGEN_TEST_FMA)
message(STATUS "FMA: ON")
else()
message(STATUS "FMA: Using architecture defaults")
endif()
if(EIGEN_TEST_AVX512)
message(STATUS "AVX512: ON")
else()
message(STATUS "AVX512: Using architecture defaults")
endif()
if(EIGEN_TEST_ALTIVEC)
message(STATUS "Altivec: ON")
else()
@@ -317,19 +453,24 @@ macro(ei_testing_print_summary)
else()
message(STATUS "ARMv8 NEON: Using architecture defaults")
endif()
if(EIGEN_TEST_ZVECTOR)
message(STATUS "S390X ZVECTOR: ON")
else()
message(STATUS "S390X ZVECTOR: Using architecture defaults")
endif()
if(EIGEN_TEST_CXX11)
message(STATUS "C++11: ON")
else()
message(STATUS "C++11: OFF")
endif()
if(EIGEN_TEST_SYCL)
message(STATUS "SYCL: ON")
else()
message(STATUS "SYCL: OFF")
endif()
if(EIGEN_TEST_CUDA)
if(EIGEN_TEST_CUDA_CLANG)
message(STATUS "CUDA: ON (using clang)")
@@ -364,7 +505,7 @@ macro(ei_init_testing)
set_property(GLOBAL PROPERTY EIGEN_FAILTEST_FAILURE_COUNT "0")
set_property(GLOBAL PROPERTY EIGEN_FAILTEST_COUNT "0")
# uncomment anytime you change the ei_get_compilerver_from_cxx_version_string macro
# ei_test_get_compilerver_from_cxx_version_string()
endmacro(ei_init_testing)
@@ -373,22 +514,22 @@ macro(ei_set_sitename)
# if the sitename is not yet set, try to set it
if(NOT ${SITE} OR ${SITE} STREQUAL "")
set(eigen_computername $ENV{COMPUTERNAME})
set(eigen_hostname $ENV{HOSTNAME})
set(eigen_hostname $ENV{HOSTNAME})
if(eigen_hostname)
set(SITE ${eigen_hostname})
elseif(eigen_computername)
set(SITE ${eigen_computername})
elseif(eigen_computername)
set(SITE ${eigen_computername})
endif()
endif()
# in case it is already set, enforce lower case
if(SITE)
string(TOLOWER ${SITE} SITE)
endif()
endif()
endmacro(ei_set_sitename)
macro(ei_get_compilerver VAR)
if(MSVC)
# on windows system, we use a modified CMake script
# on windows system, we use a modified CMake script
include(EigenDetermineVSServicePack)
EigenDetermineVSServicePack( my_service_pack )
@@ -400,20 +541,20 @@ macro(ei_get_compilerver VAR)
else()
# on all other system we rely on ${CMAKE_CXX_COMPILER}
# supporting a "--version" or "/version" flag
if(WIN32 AND ${CMAKE_CXX_COMPILER_ID} EQUAL "Intel")
set(EIGEN_CXX_FLAG_VERSION "/version")
else()
set(EIGEN_CXX_FLAG_VERSION "--version")
endif()
execute_process(COMMAND ${CMAKE_CXX_COMPILER} ${EIGEN_CXX_FLAG_VERSION}
OUTPUT_VARIABLE eigen_cxx_compiler_version_string OUTPUT_STRIP_TRAILING_WHITESPACE)
string(REGEX REPLACE "[\n\r].*" "" eigen_cxx_compiler_version_string ${eigen_cxx_compiler_version_string})
ei_get_compilerver_from_cxx_version_string("${eigen_cxx_compiler_version_string}" CNAME CVER)
set(${VAR} "${CNAME}-${CVER}")
endif()
endmacro(ei_get_compilerver)
@@ -422,13 +563,13 @@ endmacro(ei_get_compilerver)
# the testing macro call in ei_init_testing() of the EigenTesting.cmake file.
# See also the ei_test_get_compilerver_from_cxx_version_string macro at the end of the file
macro(ei_get_compilerver_from_cxx_version_string VERSTRING CNAME CVER)
# extract possible compiler names
# extract possible compiler names
string(REGEX MATCH "g\\+\\+" ei_has_gpp ${VERSTRING})
string(REGEX MATCH "llvm|LLVM" ei_has_llvm ${VERSTRING})
string(REGEX MATCH "gcc|GCC" ei_has_gcc ${VERSTRING})
string(REGEX MATCH "icpc|ICC" ei_has_icpc ${VERSTRING})
string(REGEX MATCH "clang|CLANG" ei_has_clang ${VERSTRING})
# combine them
if((ei_has_llvm) AND (ei_has_gpp OR ei_has_gcc))
set(${CNAME} "llvm-g++")
@@ -443,7 +584,7 @@ macro(ei_get_compilerver_from_cxx_version_string VERSTRING CNAME CVER)
else()
set(${CNAME} "_")
endif()
# extract possible version numbers
# first try to extract 3 isolated numbers:
string(REGEX MATCH " [0-9]+\\.[0-9]+\\.[0-9]+" eicver ${VERSTRING})
@@ -461,9 +602,9 @@ macro(ei_get_compilerver_from_cxx_version_string VERSTRING CNAME CVER)
endif()
endif()
endif()
string(REGEX REPLACE ".(.*)" "\\1" ${CVER} ${eicver})
endmacro(ei_get_compilerver_from_cxx_version_string)
macro(ei_get_cxxflags VAR)
@@ -492,30 +633,30 @@ macro(ei_get_cxxflags VAR)
elseif(EIGEN_TEST_SSE3)
set(${VAR} SSE3)
elseif(EIGEN_TEST_SSE2 OR IS_64BIT_ENV)
set(${VAR} SSE2)
set(${VAR} SSE2)
endif()
if(EIGEN_TEST_OPENMP)
if (${VAR} STREQUAL "")
set(${VAR} OMP)
else()
set(${VAR} ${${VAR}}-OMP)
endif()
set(${VAR} OMP)
else()
set(${VAR} ${${VAR}}-OMP)
endif()
endif()
if(EIGEN_DEFAULT_TO_ROW_MAJOR)
if (${VAR} STREQUAL "")
set(${VAR} ROW)
else()
set(${VAR} ${${VAR}}-ROWMAJ)
endif()
set(${VAR} ROW)
else()
set(${VAR} ${${VAR}}-ROWMAJ)
endif()
endif()
endmacro(ei_get_cxxflags)
macro(ei_set_build_string)
ei_get_compilerver(LOCAL_COMPILER_VERSION)
ei_get_cxxflags(LOCAL_COMPILER_FLAGS)
include(EigenDetermineOSVersion)
DetermineOSVersion(OS_VERSION)
@@ -531,11 +672,11 @@ macro(ei_set_build_string)
else()
set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-64bit)
endif()
if(EIGEN_TEST_CXX11)
set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-cxx11)
endif()
if(EIGEN_BUILD_STRING_SUFFIX)
set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-${EIGEN_BUILD_STRING_SUFFIX})
endif()

245
cmake/FindComputeCpp.cmake Normal file
View File

@@ -0,0 +1,245 @@
#.rst:
# FindComputeCpp
#---------------
#
# Copyright 2016 Codeplay Software Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use these files except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#########################
# FindComputeCpp.cmake
#########################
#
# Tools for finding and building with ComputeCpp.
#
# User must define COMPUTECPP_PACKAGE_ROOT_DIR pointing to the ComputeCpp
# installation.
#
# Latest version of this file can be found at:
# https://github.com/codeplaysoftware/computecpp-sdk
# Require CMake version 3.2.2 or higher
cmake_minimum_required(VERSION 3.2.2)
# Check that a supported host compiler can be found
if(CMAKE_COMPILER_IS_GNUCXX)
# Require at least gcc 4.8
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8)
message(FATAL_ERROR
"host compiler - Not found! (gcc version must be at least 4.8)")
# Require the GCC dual ABI to be disabled for 5.1 or higher
elseif (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.1)
set(COMPUTECPP_DISABLE_GCC_DUAL_ABI "True")
message(STATUS
"host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION} (note pre 5.1 gcc ABI enabled)")
else()
message(STATUS "host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION}")
endif()
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
# Require at least clang 3.6
if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.6)
message(FATAL_ERROR
"host compiler - Not found! (clang version must be at least 3.6)")
else()
message(STATUS "host compiler - clang ${CMAKE_CXX_COMPILER_VERSION}")
endif()
else()
message(WARNING
"host compiler - Not found! (ComputeCpp supports GCC and Clang, see readme)")
endif()
set(COMPUTECPP_64_BIT_DEFAULT ON)
option(COMPUTECPP_64_BIT_CODE "Compile device code in 64 bit mode"
${COMPUTECPP_64_BIT_DEFAULT})
mark_as_advanced(COMPUTECPP_64_BIT_CODE)
# Find OpenCL package
find_package(OpenCL REQUIRED)
# Find ComputeCpp packagee
if(NOT COMPUTECPP_PACKAGE_ROOT_DIR)
message(FATAL_ERROR
"ComputeCpp package - Not found! (please set COMPUTECPP_PACKAGE_ROOT_DIR")
else()
message(STATUS "ComputeCpp package - Found")
endif()
option(COMPUTECPP_PACKAGE_ROOT_DIR "Path to the ComputeCpp Package")
# Obtain the path to compute++
find_program(COMPUTECPP_DEVICE_COMPILER compute++ PATHS
${COMPUTECPP_PACKAGE_ROOT_DIR} PATH_SUFFIXES bin)
if (EXISTS ${COMPUTECPP_DEVICE_COMPILER})
mark_as_advanced(COMPUTECPP_DEVICE_COMPILER)
message(STATUS "compute++ - Found")
else()
message(FATAL_ERROR "compute++ - Not found! (${COMPUTECPP_DEVICE_COMPILER})")
endif()
# Obtain the path to computecpp_info
find_program(COMPUTECPP_INFO_TOOL computecpp_info PATHS
${COMPUTECPP_PACKAGE_ROOT_DIR} PATH_SUFFIXES bin)
if (EXISTS ${COMPUTECPP_INFO_TOOL})
mark_as_advanced(${COMPUTECPP_INFO_TOOL})
message(STATUS "computecpp_info - Found")
else()
message(FATAL_ERROR "computecpp_info - Not found! (${COMPUTECPP_INFO_TOOL})")
endif()
# Obtain the path to the ComputeCpp runtime library
find_library(COMPUTECPP_RUNTIME_LIBRARY ComputeCpp PATHS ${COMPUTECPP_PACKAGE_ROOT_DIR}
HINTS ${COMPUTECPP_PACKAGE_ROOT_DIR}/lib PATH_SUFFIXES lib
DOC "ComputeCpp Runtime Library" NO_DEFAULT_PATH)
if (EXISTS ${COMPUTECPP_RUNTIME_LIBRARY})
mark_as_advanced(COMPUTECPP_RUNTIME_LIBRARY)
message(STATUS "libComputeCpp.so - Found")
else()
message(FATAL_ERROR "libComputeCpp.so - Not found!")
endif()
# Obtain the ComputeCpp include directory
set(COMPUTECPP_INCLUDE_DIRECTORY ${COMPUTECPP_PACKAGE_ROOT_DIR}/include/)
if (NOT EXISTS ${COMPUTECPP_INCLUDE_DIRECTORY})
message(FATAL_ERROR "ComputeCpp includes - Not found!")
else()
message(STATUS "ComputeCpp includes - Found")
endif()
# Obtain the package version
execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-version"
OUTPUT_VARIABLE COMPUTECPP_PACKAGE_VERSION
RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0")
message(FATAL_ERROR "Package version - Error obtaining version!")
else()
mark_as_advanced(COMPUTECPP_PACKAGE_VERSION)
message(STATUS "Package version - ${COMPUTECPP_PACKAGE_VERSION}")
endif()
# Obtain the device compiler flags
execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-device-compiler-flags"
OUTPUT_VARIABLE COMPUTECPP_DEVICE_COMPILER_FLAGS
RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0")
message(FATAL_ERROR "compute++ flags - Error obtaining compute++ flags!")
else()
mark_as_advanced(COMPUTECPP_COMPILER_FLAGS)
message(STATUS "compute++ flags - ${COMPUTECPP_DEVICE_COMPILER_FLAGS}")
endif()
set(COMPUTECPP_DEVICE_COMPILER_FLAGS ${COMPUTECPP_DEVICE_COMPILER_FLAGS} -sycl-compress-name -no-serial-memop -DEIGEN_NO_ASSERTION_CHECKING=1)
# Check if the platform is supported
execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-is-supported"
OUTPUT_VARIABLE COMPUTECPP_PLATFORM_IS_SUPPORTED
RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0")
message(FATAL_ERROR "platform - Error checking platform support!")
else()
mark_as_advanced(COMPUTECPP_PLATFORM_IS_SUPPORTED)
if (COMPUTECPP_PLATFORM_IS_SUPPORTED)
message(STATUS "platform - your system can support ComputeCpp")
else()
message(STATUS "platform - your system CANNOT support ComputeCpp")
endif()
endif()
####################
# __build_sycl
####################
#
# Adds a custom target for running compute++ and adding a dependency for the
# resulting integration header.
#
# targetName : Name of the target.
# sourceFile : Source file to be compiled.
# binaryDir : Intermediate directory to output the integration header.
#
function(__build_spir targetName sourceFile binaryDir)
# Retrieve source file name.
get_filename_component(sourceFileName ${sourceFile} NAME)
# Set the path to the Sycl file.
set(outputSyclFile ${binaryDir}/${sourceFileName}.sycl)
# Add any user-defined include to the device compiler
get_property(includeDirectories DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY
INCLUDE_DIRECTORIES)
set(device_compiler_includes "")
foreach(directory ${includeDirectories})
set(device_compiler_includes "-I${directory}" ${device_compiler_includes})
endforeach()
if (CMAKE_INCLUDE_PATH)
foreach(directory ${CMAKE_INCLUDE_PATH})
set(device_compiler_includes "-I${directory}"
${device_compiler_includes})
endforeach()
endif()
# Convert argument list format
separate_arguments(COMPUTECPP_DEVICE_COMPILER_FLAGS)
# Add custom command for running compute++
add_custom_command(
OUTPUT ${outputSyclFile}
COMMAND ${COMPUTECPP_DEVICE_COMPILER}
${COMPUTECPP_DEVICE_COMPILER_FLAGS}
-isystem ${COMPUTECPP_INCLUDE_DIRECTORY}
${COMPUTECPP_PLATFORM_SPECIFIC_ARGS}
${device_compiler_includes}
-o ${outputSyclFile}
-c ${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile}
DEPENDS ${sourceFile}
WORKING_DIRECTORY ${binaryDir}
COMMENT "Building ComputeCpp integration header file ${outputSyclFile}")
# Add a custom target for the generated integration header
add_custom_target(${targetName}_integration_header DEPENDS ${outputSyclFile})
# Add a dependency on the integration header
add_dependencies(${targetName} ${targetName}_integration_header)
# Set the host compiler C++ standard to C++11
set_property(TARGET ${targetName} PROPERTY CXX_STANDARD 11)
# Disable GCC dual ABI on GCC 5.1 and higher
if(COMPUTECPP_DISABLE_GCC_DUAL_ABI)
set_property(TARGET ${targetName} APPEND PROPERTY COMPILE_DEFINITIONS
"_GLIBCXX_USE_CXX11_ABI=0")
endif()
endfunction()
#######################
# add_sycl_to_target
#######################
#
# Adds a SYCL compilation custom command associated with an existing
# target and sets a dependancy on that new command.
#
# targetName : Name of the target to add a SYCL to.
# sourceFile : Source file to be compiled for SYCL.
# binaryDir : Intermediate directory to output the integration header.
#
function(add_sycl_to_target targetName sourceFile binaryDir)
# Add custom target to run compute++ and generate the integration header
__build_spir(${targetName} ${sourceFile} ${binaryDir})
# Link with the ComputeCpp runtime library
target_link_libraries(${targetName} PUBLIC ${COMPUTECPP_RUNTIME_LIBRARY}
PUBLIC ${OpenCL_LIBRARIES})
endfunction(add_sycl_to_target)

View File

@@ -26,6 +26,7 @@ namespace Eigen {
- \subpage TopicPitfalls
- \subpage TopicTemplateKeyword
- \subpage UserManual_UnderstandingEigen
- \subpage TopicCMakeGuide
*/
/** \page UserManual_UnderstandingEigen Understanding Eigen

View File

@@ -97,31 +97,35 @@ run time. However, these assertions do cost time and can thus be turned off.
\section TopicPreprocessorDirectivesPerformance Alignment, vectorization and performance tweaking
- \b EIGEN_MALLOC_ALREADY_ALIGNED - Can be set to 0 or 1 to tell whether default system \c malloc already
- \b \c EIGEN_MALLOC_ALREADY_ALIGNED - Can be set to 0 or 1 to tell whether default system \c malloc already
returns aligned buffers. In not defined, then this information is automatically deduced from the compiler
and system preprocessor tokens.
- \b EIGEN_DONT_ALIGN - disables alignment completely. %Eigen will not try to align its objects and does not
expect that any objects passed to it are aligned. This will turn off vectorization. Not defined by default.
- \b EIGEN_DONT_ALIGN_STATICALLY - disables alignment of arrays on the stack. Not defined by default, unless
\c EIGEN_DONT_ALIGN is defined.
- \b EIGEN_DONT_PARALLELIZE - if defined, this disables multi-threading. This is only relevant if you enabled OpenMP.
- \b \c EIGEN_MAX_ALIGN_BYTES - Must be a power of two, or 0. Defines an upper bound on the memory boundary in bytes on which dynamically and statically allocated data may be aligned by %Eigen. If not defined, a default value is automatically computed based on architecture, compiler, and OS.
This option is typically used to enforce binary compatibility between code/libraries compiled with different SIMD options. For instance, one may compile AVX code and enforce ABI compatibility with existing SSE code by defining \c EIGEN_MAX_ALIGN_BYTES=16. In the other way round, since by default AVX implies 32 bytes alignment for best performance, one can compile SSE code to be ABI compatible with AVX code by defining \c EIGEN_MAX_ALIGN_BYTES=32.
- \b \c EIGEN_MAX_STATIC_ALIGN_BYTES - Same as \c EIGEN_MAX_ALIGN_BYTES but for statically allocated data only. By default, if only \c EIGEN_MAX_ALIGN_BYTES is defined, then \c EIGEN_MAX_STATIC_ALIGN_BYTES == \c EIGEN_MAX_ALIGN_BYTES, otherwise a default value is automatically computed based on architecture, compiler, and OS (can be smaller than the default value of EIGEN_MAX_ALIGN_BYTES on architectures that do not support stack alignment).
Let us emphasize that \c EIGEN_MAX_*_ALIGN_BYTES define only a diserable upper bound. In practice data is aligned to largest power-of-two common divisor of \c EIGEN_MAX_STATIC_ALIGN_BYTES and the size of the data, such that memory is not wasted.
- \b \c EIGEN_DONT_PARALLELIZE - if defined, this disables multi-threading. This is only relevant if you enabled OpenMP.
See \ref TopicMultiThreading for details.
- \b EIGEN_DONT_VECTORIZE - disables explicit vectorization when defined. Not defined by default, unless
alignment is disabled by %Eigen's platform test or the user defining \c EIGEN_DONT_ALIGN.
- \b EIGEN_UNALIGNED_VECTORIZE - disables/enables vectorization with unaligned stores. Default is 1 (enabled).
- \b \c EIGEN_UNALIGNED_VECTORIZE - disables/enables vectorization with unaligned stores. Default is 1 (enabled).
If set to 0 (disabled), then expression for which the destination cannot be aligned are not vectorized (e.g., unaligned
small fixed size vectors or matrices)
- \b EIGEN_FAST_MATH - enables some optimizations which might affect the accuracy of the result. This currently
- \b \c EIGEN_FAST_MATH - enables some optimizations which might affect the accuracy of the result. This currently
enables the SSE vectorization of sin() and cos(), and speedups sqrt() for single precision. Defined to 1 by default.
Define it to 0 to disable.
- \b EIGEN_UNROLLING_LIMIT - defines the size of a loop to enable meta unrolling. Set it to zero to disable
- \b \c EIGEN_UNROLLING_LIMIT - defines the size of a loop to enable meta unrolling. Set it to zero to disable
unrolling. The size of a loop here is expressed in %Eigen's own notion of "number of FLOPS", it does not
correspond to the number of iterations or the number of instructions. The default is value 100.
- \b EIGEN_STACK_ALLOCATION_LIMIT - defines the maximum bytes for a buffer to be allocated on the stack. For internal
- \b \c EIGEN_STACK_ALLOCATION_LIMIT - defines the maximum bytes for a buffer to be allocated on the stack. For internal
temporary buffers, dynamic memory allocation is employed as a fall back. For fixed-size matrices or arrays, exceeding
this threshold raises a compile time assertion. Use 0 to set no limit. Default is 128 KB.
- \c EIGEN_DONT_ALIGN - Deprecated, it is a synonym for \c EIGEN_MAX_ALIGN_BYTES=0. It disables alignment completely. %Eigen will not try to align its objects and does not expect that any objects passed to it are aligned. This will turn off vectorization if \b EIGEN_UNALIGNED_VECTORIZE=1. Not defined by default.
- \c EIGEN_DONT_ALIGN_STATICALLY - Deprecated, it is a synonym for \c EIGEN_MAX_STATIC_ALIGN_BYTES=0. It disables alignment of arrays on the stack. Not defined by default, unless \c EIGEN_DONT_ALIGN is defined.
\section TopicPreprocessorDirectivesPlugins Plugins
It is possible to add new methods to many fundamental classes in %Eigen by writing a plugin. As explained in

52
doc/TopicCMakeGuide.dox Normal file
View File

@@ -0,0 +1,52 @@
namespace Eigen {
/**
\page TopicCMakeGuide Using %Eigen in CMake Projects
%Eigen provides native CMake support which allows the library to be easily
used in CMake projects.
\note %CMake 3.0 (or later) is required to enable this functionality.
%Eigen exports a CMake target called `Eigen3::Eigen` which can be imported
using the `find_package` CMake command and used by calling
`target_link_libraries` as in the following example:
\code{.cmake}
cmake_minimum_required (VERSION 3.0)
project (myproject)
find_package (Eigen3 3.3 REQUIRED NO_MODULE)
add_executable (example example.cpp)
target_link_libraries (example Eigen3::Eigen)
\endcode
The above code snippet must be placed in a file called `CMakeLists.txt` alongside
`example.cpp`. After running
\code{.sh}
$ cmake path-to-example-directory
\endcode
CMake will produce project files that generate an executable called `example`
which requires at least version 3.3 of %Eigen. Here, `path-to-example-directory`
is the path to the directory that contains both `CMakeLists.txt` and
`example.cpp`.
If you have multiple installed version of %Eigen, you can pick your favorite one by setting the \c Eigen3_DIR cmake's variable to the respective path containing the \c Eigen3*.cmake files. For instance:
\code
cmake path-to-example-directory -DEigen3_DIR=$HOME/mypackages/share/eigen3/cmake/
\endcode
If the `REQUIRED` option is omitted when locating %Eigen using
`find_package`, one can check whether the package was found as follows:
\code{.cmake}
find_package (Eigen3 3.3 NO_MODULE)
if (TARGET Eigen3::Eigen)
# Use the imported target
endif (TARGET Eigen3::Eigen)
\endcode
*/
}

View File

@@ -0,0 +1,52 @@
#include <Eigen/Core>
#include <iostream>
using namespace Eigen;
// [circulant_func]
template<class ArgType>
class circulant_functor {
const ArgType &m_vec;
public:
circulant_functor(const ArgType& arg) : m_vec(arg) {}
const typename ArgType::Scalar& operator() (Index row, Index col) const {
Index index = row - col;
if (index < 0) index += m_vec.size();
return m_vec(index);
}
};
// [circulant_func]
// [square]
template<class ArgType>
struct circulant_helper {
typedef Matrix<typename ArgType::Scalar,
ArgType::SizeAtCompileTime,
ArgType::SizeAtCompileTime,
ColMajor,
ArgType::MaxSizeAtCompileTime,
ArgType::MaxSizeAtCompileTime> MatrixType;
};
// [square]
// [makeCirculant]
template <class ArgType>
CwiseNullaryOp<circulant_functor<ArgType>, typename circulant_helper<ArgType>::MatrixType>
makeCirculant(const Eigen::MatrixBase<ArgType>& arg)
{
typedef typename circulant_helper<ArgType>::MatrixType MatrixType;
return MatrixType::NullaryExpr(arg.size(), arg.size(), circulant_functor<ArgType>(arg.derived()));
}
// [makeCirculant]
// [main]
int main()
{
Eigen::VectorXd vec(4);
vec << 1, 2, 4, 8;
Eigen::MatrixXd mat;
mat = makeCirculant(vec);
std::cout << mat << std::endl;
}
// [main]

View File

@@ -0,0 +1,8 @@
cout << "Even spacing inputs:" << endl;
cout << VectorXi::LinSpaced(8,1,4).transpose() << endl;
cout << VectorXi::LinSpaced(8,1,8).transpose() << endl;
cout << VectorXi::LinSpaced(8,1,15).transpose() << endl;
cout << "Uneven spacing inputs:" << endl;
cout << VectorXi::LinSpaced(8,1,7).transpose() << endl;
cout << VectorXi::LinSpaced(8,1,9).transpose() << endl;
cout << VectorXi::LinSpaced(8,1,16).transpose() << endl;

View File

@@ -355,7 +355,7 @@ if(CUDA_FOUND)
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE)
set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE)
endif()
if(EIGEN_TEST_CUDA_CLANG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_30")

View File

@@ -134,6 +134,12 @@ template<typename MatrixType> void comparisons(const MatrixType& m)
// count
VERIFY(((m1.array().abs()+1)>RealScalar(0.1)).count() == rows*cols);
// and/or
VERIFY( ((m1.array()<RealScalar(0)).matrix() && (m1.array()>RealScalar(0)).matrix()).count() == 0);
VERIFY( ((m1.array()<RealScalar(0)).matrix() || (m1.array()>=RealScalar(0)).matrix()).count() == rows*cols);
RealScalar a = m1.cwiseAbs().mean();
VERIFY( ((m1.array()<-a).matrix() || (m1.array()>a).matrix()).count() == (m1.cwiseAbs().array()>a).count());
typedef Matrix<typename MatrixType::Index, Dynamic, 1> VectorOfIndices;
// TODO allows colwise/rowwise for array

View File

@@ -111,6 +111,8 @@ template<typename Scalar,int Size> void homogeneous(void)
VERIFY_IS_APPROX( (v0.transpose().homogeneous() .lazyProduct( t2 )).hnormalized(), (v0.transpose().homogeneous()*t2).hnormalized() );
VERIFY_IS_APPROX( (pts.transpose().rowwise().homogeneous() .lazyProduct( t2 )).rowwise().hnormalized(), (pts1.transpose()*t2).rowwise().hnormalized() );
VERIFY_IS_APPROX( (t2.template triangularView<Lower>() * v0.homogeneous()).eval(), (t2.template triangularView<Lower>()*hv0) );
}
void test_geo_homogeneous()

Some files were not shown because too many files have changed in this diff Show More