From 4cc2c73e6ac9bf0a5d7ad59ad43627353c380b02 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 17 Sep 2016 12:52:27 +0200 Subject: [PATCH 01/19] Fix alignement of statically allocated temporaries in gemv. --- Eigen/src/Core/GeneralProduct.h | 18 +++++++++--------- test/product_small.cpp | 3 ++- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index bff322b3c..a8c83f168 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -159,20 +159,20 @@ struct gemv_static_vector_if template struct gemv_static_vector_if { - #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0 - internal::plain_array m_data; - EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; } - #else - // Some architectures cannot align on the stack, - // => let's manually enforce alignment by allocating more data and return the address of the first aligned element. enum { ForceAlignment = internal::packet_traits::Vectorizable, PacketSize = internal::packet_traits::size }; - internal::plain_array m_data; + #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0 + internal::plain_array m_data; + EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; } + #else + // Some architectures cannot align on the stack, + // => let's manually enforce alignment by allocating more data and return the address of the first aligned element. + internal::plain_array m_data; EIGEN_STRONG_INLINE Scalar* data() { return ForceAlignment - ? reinterpret_cast((internal::UIntPtr(m_data.array) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES) + ? reinterpret_cast((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES) : m_data.array; } #endif @@ -207,7 +207,7 @@ template<> struct gemv_dense_selector typedef internal::blas_traits RhsBlasTraits; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; - typedef Map, Aligned> MappedDest; + typedef Map, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits::size)> MappedDest; ActualLhsType actualLhs = LhsBlasTraits::extract(lhs); ActualRhsType actualRhs = RhsBlasTraits::extract(rhs); diff --git a/test/product_small.cpp b/test/product_small.cpp index 3e8dab01e..0db50b949 100644 --- a/test/product_small.cpp +++ b/test/product_small.cpp @@ -213,7 +213,8 @@ void test_product_small() { for(int i = 0; i < g_repeat; i++) { CALL_SUBTEST_1( product(Matrix()) ); - CALL_SUBTEST_2( product(Matrix()) ); + CALL_SUBTEST_2( product(Matrix()) ); + CALL_SUBTEST_8( product(Matrix()) ); CALL_SUBTEST_3( product(Matrix3d()) ); CALL_SUBTEST_4( product(Matrix4d()) ); CALL_SUBTEST_5( product(Matrix4f()) ); From de05a18fe0d9903b77182df41155794248fa0b09 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 17 Sep 2016 14:13:48 +0200 Subject: [PATCH 02/19] fix compilation with boost::multiprec --- test/svd_fill.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/svd_fill.h b/test/svd_fill.h index a1f752c66..5c2c61f8e 100644 --- a/test/svd_fill.h +++ b/test/svd_fill.h @@ -8,7 +8,7 @@ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. template -Array four_denorms(); +Array four_denorms() { return four_denorms().cast(); } template<> Array4f four_denorms() { return Array4f(5.60844e-39f, -5.60844e-39f, 4.94e-44f, -4.94e-44f); } From bf03820339f45f9483ddbe5bb927ca3078fda19b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 17 Sep 2016 14:14:01 +0200 Subject: [PATCH 03/19] Silent warning. --- test/fastmath.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/fastmath.cpp b/test/fastmath.cpp index 438e6b2e5..cc5db0746 100644 --- a/test/fastmath.cpp +++ b/test/fastmath.cpp @@ -49,7 +49,8 @@ void check_inf_nan(bool dryrun) { VERIFY( !m.allFinite() ); VERIFY( m.hasNaN() ); } - m(4) /= T(0.0); + T hidden_zero = (std::numeric_limits::min)()*(std::numeric_limits::min)(); + m(4) /= hidden_zero; if(dryrun) { std::cout << "std::isfinite(" << m(4) << ") = "; check((std::isfinite)(m(4)),false); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(4)), false); std::cout << "\n"; From 5dcc6d301a822db0fb95d5b9e010c0eb39d85bbc Mon Sep 17 00:00:00 2001 From: Hongkai Dai Date: Mon, 19 Sep 2016 10:30:30 -0700 Subject: [PATCH 04/19] remove ternary operator in euler angles --- Eigen/src/Geometry/EulerAngles.h | 14 ++++++++++++-- unsupported/Eigen/src/EulerAngles/EulerSystem.h | 14 ++++++++++++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/Eigen/src/Geometry/EulerAngles.h b/Eigen/src/Geometry/EulerAngles.h index b875b7a13..4865e58aa 100644 --- a/Eigen/src/Geometry/EulerAngles.h +++ b/Eigen/src/Geometry/EulerAngles.h @@ -55,7 +55,12 @@ MatrixBase::eulerAngles(Index a0, Index a1, Index a2) const res[0] = atan2(coeff(j,i), coeff(k,i)); if((odd && res[0]Scalar(0))) { - res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI); + if(res[0] > Scalar(0)) { + res[0] -= Scalar(EIGEN_PI); + } + else { + res[0] += Scalar(EIGEN_PI); + } Scalar s2 = Vector2(coeff(j,i), coeff(k,i)).norm(); res[1] = -atan2(s2, coeff(i,i)); } @@ -84,7 +89,12 @@ MatrixBase::eulerAngles(Index a0, Index a1, Index a2) const res[0] = atan2(coeff(j,k), coeff(k,k)); Scalar c2 = Vector2(coeff(i,i), coeff(i,j)).norm(); if((odd && res[0]Scalar(0))) { - res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI); + if(res[0] > Scalar(0)) { + res[0] -= Scalar(EIGEN_PI); + } + else { + res[0] += Scalar(EIGEN_PI); + } res[1] = atan2(-coeff(i,k), -c2); } else diff --git a/unsupported/Eigen/src/EulerAngles/EulerSystem.h b/unsupported/Eigen/src/EulerAngles/EulerSystem.h index 82243e643..98f9f647d 100644 --- a/unsupported/Eigen/src/EulerAngles/EulerSystem.h +++ b/unsupported/Eigen/src/EulerAngles/EulerSystem.h @@ -189,7 +189,12 @@ namespace Eigen res[0] = atan2(mat(J,K), mat(K,K)); Scalar c2 = Vector2(mat(I,I), mat(I,J)).norm(); if((IsOdd && res[0]Scalar(0))) { - res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI); + if(res[0] > Scalar(0)) { + res[0] -= Scalar(EIGEN_PI); + } + else { + res[0] += Scalar(EIGEN_PI); + } res[1] = atan2(-mat(I,K), -c2); } else @@ -212,7 +217,12 @@ namespace Eigen res[0] = atan2(mat(J,I), mat(K,I)); if((IsOdd && res[0]Scalar(0))) { - res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI); + if(res[0] > Scalar(0)) { + res[0] -= Scalar(EIGEN_PI); + } + else { + res[0] += Scalar(EIGEN_PI); + } Scalar s2 = Vector2(mat(J,I), mat(K,I)).norm(); res[1] = -atan2(s2, mat(I,I)); } From c3ca9b1e763ca74f953ee1cff43f2e0ba9d2edf9 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 19 Sep 2016 11:33:39 -0700 Subject: [PATCH 05/19] Deleted some unecessary and confusing EIGEN_DEVICE_FUNC --- .../Eigen/CXX11/src/Tensor/TensorDeviceCuda.h | 93 ++++--------------- 1 file changed, 18 insertions(+), 75 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h index 1468caa23..28c6f7626 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h @@ -168,39 +168,20 @@ struct GpuDevice { return stream_->stream(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { return stream_->allocate(num_bytes); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return NULL; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE void deallocate(void* buffer) const { stream_->deallocate(buffer); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* scratchpad() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE void* scratchpad() const { return stream_->scratchpad(); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return NULL; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned int* semaphore() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE unsigned int* semaphore() const { return stream_->semaphore(); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return NULL; -#endif } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { @@ -210,30 +191,22 @@ struct GpuDevice { EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); #else - eigen_assert(false && "The default device should be used instead to generate kernel code"); + eigen_assert(false && "The default device should be used instead to generate kernel code"); #endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyHostToDevice, stream_->stream()); EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToHost, stream_->stream()); EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); -#endif } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { @@ -242,21 +215,21 @@ struct GpuDevice { EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); #else - eigen_assert(false && "The default device should be used instead to generate kernel code"); + eigen_assert(false && "The default device should be used instead to generate kernel code"); #endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { + EIGEN_STRONG_INLINE size_t numThreads() const { // FIXME return 32; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { + EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { // FIXME return 48*1024; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { + EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { // We won't try to take advantage of the l2 cache for the time being, and // there is no l3 cache on cuda devices. return firstLevelCacheSize(); @@ -276,56 +249,26 @@ struct GpuDevice { #endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const { return stream_->deviceProperties().multiProcessorCount; -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return 0; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const { return stream_->deviceProperties().maxThreadsPerBlock; -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return 0; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const { return stream_->deviceProperties().maxThreadsPerMultiProcessor; -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return 0; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int sharedMemPerBlock() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE int sharedMemPerBlock() const { return stream_->deviceProperties().sharedMemPerBlock; -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return 0; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE int majorDeviceVersion() const { return stream_->deviceProperties().major; -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return 0; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int minorDeviceVersion() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE int minorDeviceVersion() const { return stream_->deviceProperties().minor; -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return 0; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxBlocks() const { + EIGEN_STRONG_INLINE int maxBlocks() const { return max_blocks_; } From 59e9edfbf1e1a58d1e2401ec2f05b6fdd19fd87c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 19 Sep 2016 14:13:20 -0700 Subject: [PATCH 06/19] Removed EIGEN_DEVICE_FUNC qualifers for the lu(), fullPivLu(), partialPivLu(), and inverse() functions since they aren't ready to run on GPU --- Eigen/src/Core/MatrixBase.h | 4 ---- Eigen/src/LU/FullPivLU.h | 2 +- Eigen/src/LU/InverseImpl.h | 2 +- Eigen/src/LU/PartialPivLU.h | 4 ++-- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 19a7483ba..d56df8249 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -330,15 +330,11 @@ template class MatrixBase /////////// LU module /////////// - EIGEN_DEVICE_FUNC inline const FullPivLU fullPivLu() const; - EIGEN_DEVICE_FUNC inline const PartialPivLU partialPivLu() const; - EIGEN_DEVICE_FUNC inline const PartialPivLU lu() const; - EIGEN_DEVICE_FUNC inline const Inverse inverse() const; template diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index ebcd5c208..03b6af706 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h @@ -879,7 +879,7 @@ struct Assignment >, internal::assign_ * * \sa class FullPivLU */ -template EIGEN_DEVICE_FUNC +template inline const FullPivLU::PlainObject> MatrixBase::fullPivLu() const { diff --git a/Eigen/src/LU/InverseImpl.h b/Eigen/src/LU/InverseImpl.h index 147f9496c..3134632e1 100644 --- a/Eigen/src/LU/InverseImpl.h +++ b/Eigen/src/LU/InverseImpl.h @@ -327,7 +327,7 @@ struct Assignment, internal::assign_op EIGEN_DEVICE_FUNC +template inline const Inverse MatrixBase::inverse() const { EIGEN_STATIC_ASSERT(!NumTraits::IsInteger,THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES) diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h index 13394fffa..d43961887 100644 --- a/Eigen/src/LU/PartialPivLU.h +++ b/Eigen/src/LU/PartialPivLU.h @@ -584,7 +584,7 @@ struct Assignment >, internal::assi * * \sa class PartialPivLU */ -template EIGEN_DEVICE_FUNC +template inline const PartialPivLU::PlainObject> MatrixBase::partialPivLu() const { @@ -599,7 +599,7 @@ MatrixBase::partialPivLu() const * * \sa class PartialPivLU */ -template EIGEN_DEVICE_FUNC +template inline const PartialPivLU::PlainObject> MatrixBase::lu() const { From b2c6dc48d9189eb96f878aa6028aec245eadde85 Mon Sep 17 00:00:00 2001 From: RJ Ryan Date: Tue, 20 Sep 2016 07:18:20 -0700 Subject: [PATCH 07/19] Add CUDA-specific std::complex specializations for scalar_sum_op, scalar_difference_op, scalar_product_op, and scalar_quotient_op. --- Eigen/Core | 1 + Eigen/src/Core/arch/CUDA/Complex.h | 80 +++++++++++++++ unsupported/test/CMakeLists.txt | 1 + .../cxx11_tensor_complex_cwise_ops_cuda.cu | 97 +++++++++++++++++++ 4 files changed, 179 insertions(+) create mode 100644 Eigen/src/Core/arch/CUDA/Complex.h create mode 100644 unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu diff --git a/Eigen/Core b/Eigen/Core index 3ffd6220b..bf2479585 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -359,6 +359,7 @@ using std::ptrdiff_t; #include "src/Core/arch/ZVector/Complex.h" #endif +#include "src/Core/arch/CUDA/Complex.h" // Half float support #include "src/Core/arch/CUDA/Half.h" #include "src/Core/arch/CUDA/PacketMathHalf.h" diff --git a/Eigen/src/Core/arch/CUDA/Complex.h b/Eigen/src/Core/arch/CUDA/Complex.h new file mode 100644 index 000000000..aa511a4b2 --- /dev/null +++ b/Eigen/src/Core/arch/CUDA/Complex.h @@ -0,0 +1,80 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_COMPLEX_CUDA_H +#define EIGEN_COMPLEX_CUDA_H + +// clang-format off + +namespace Eigen { + +namespace internal { + +#if defined(__CUDACC__) && defined(EIGEN_USE_GPU) + +// Many std::complex methods such as operator+, operator-, operator* and +// operator/ are not constexpr. Due to this, clang does not treat them as device +// functions and thus Eigen functors making use of these operators fail to +// compile. Here, we manually specialize these functors for complex types when +// building for CUDA to avoid non-constexpr methods. + +template struct scalar_sum_op> { + EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex operator() (const std::complex& a, const std::complex& b) const { + return std::complex(numext::real(a) + numext::real(b), + numext::imag(a) + numext::imag(b)); + } +}; + +template struct scalar_difference_op> { + EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex operator() (const std::complex& a, const std::complex& b) const { + return std::complex(numext::real(a) - numext::real(b), + numext::imag(a) - numext::imag(b)); + } +}; + +template struct scalar_product_op, std::complex> { + enum { + Vectorizable = packet_traits>::HasMul + }; + EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex operator() (const std::complex& a, const std::complex& b) const { + const T a_real = numext::real(a); + const T a_imag = numext::imag(a); + const T b_real = numext::real(b); + const T b_imag = numext::imag(b); + return std::complex(a_real * b_real - a_imag * b_imag, + a_real * b_imag + a_imag * b_real); + } +}; + +template struct scalar_quotient_op, std::complex> { + enum { + Vectorizable = packet_traits>::HasDiv + }; + EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex operator() (const std::complex& a, const std::complex& b) const { + const T a_real = numext::real(a); + const T a_imag = numext::imag(a); + const T b_real = numext::real(b); + const T b_imag = numext::imag(b); + const T norm = T(1) / (b_real * b_real + b_imag * b_imag); + return std::complex((a_real * b_real + a_imag * b_imag) * norm, + (a_imag * b_real - a_real * b_imag) * norm); + } +}; + +#endif + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_COMPLEX_CUDA_H diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 714046809..9eac6ec73 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -226,6 +226,7 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA) set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") ei_add_test(cxx11_tensor_complex_cuda) + ei_add_test(cxx11_tensor_complex_cwise_ops_cuda) ei_add_test(cxx11_tensor_reduction_cuda) ei_add_test(cxx11_tensor_argmax_cuda) ei_add_test(cxx11_tensor_cast_float16_cuda) diff --git a/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu b/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu new file mode 100644 index 000000000..54c17ca28 --- /dev/null +++ b/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu @@ -0,0 +1,97 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_FUNC cxx11_tensor_complex_cwise_ops +#define EIGEN_USE_GPU + +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include +#endif +#include "main.h" +#include + +using Eigen::Tensor; + +template +void test_cuda_complex_cwise_ops() { + const int kNumItems = 2; + std::size_t complex_bytes = kNumItems * sizeof(std::complex); + + std::complex* d_in1; + std::complex* d_in2; + std::complex* d_out; + cudaMalloc((void**)(&d_in1), complex_bytes); + cudaMalloc((void**)(&d_in2), complex_bytes); + cudaMalloc((void**)(&d_out), complex_bytes); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap, 1, 0, int>, Eigen::Aligned> gpu_in1( + d_in1, kNumItems); + Eigen::TensorMap, 1, 0, int>, Eigen::Aligned> gpu_in2( + d_in2, kNumItems); + Eigen::TensorMap, 1, 0, int>, Eigen::Aligned> gpu_out( + d_out, kNumItems); + + const std::complex a(3.14f, 2.7f); + const std::complex b(-10.6f, 1.4f); + + gpu_in1.device(gpu_device) = gpu_in1.constant(a); + gpu_in2.device(gpu_device) = gpu_in2.constant(b); + + enum CwiseOp { + Add, + Sub, + Mul, + Div + }; + + Tensor, 1, 0, int> actual(2); + for (CwiseOp op : {Add, Sub, Mul, Div}) { + std::complex expected; + switch (op) { + case Add: + gpu_out.device(gpu_device) = gpu_in1 + gpu_in2; + expected = a + b; + break; + case Sub: + gpu_out.device(gpu_device) = gpu_in1 - gpu_in2; + expected = a - b; + break; + case Mul: + gpu_out.device(gpu_device) = gpu_in1 * gpu_in2; + expected = a * b; + break; + case Div: + gpu_out.device(gpu_device) = gpu_in1 / gpu_in2; + expected = a / b; + break; + } + assert(cudaMemcpyAsync(actual.data(), d_out, complex_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < kNumItems; ++i) { + VERIFY_IS_APPROX(actual(i), expected); + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); +} + + +void test_cxx11_tensor_complex_cwise_ops() +{ + CALL_SUBTEST(test_cuda_complex_cwise_ops()); + CALL_SUBTEST(test_cuda_complex_cwise_ops()); +} From 608b1acd6d43bff23b3b136c546d88c939e4d37d Mon Sep 17 00:00:00 2001 From: RJ Ryan Date: Tue, 20 Sep 2016 07:49:05 -0700 Subject: [PATCH 08/19] Don't use c++11 features and fix include. --- .../test/cxx11_tensor_complex_cwise_ops_cuda.cu | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu b/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu index 54c17ca28..2baf5eaad 100644 --- a/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu +++ b/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu @@ -15,7 +15,7 @@ #include #endif #include "main.h" -#include +#include using Eigen::Tensor; @@ -48,16 +48,16 @@ void test_cuda_complex_cwise_ops() { gpu_in2.device(gpu_device) = gpu_in2.constant(b); enum CwiseOp { - Add, + Add = 0, Sub, Mul, Div }; - Tensor, 1, 0, int> actual(2); - for (CwiseOp op : {Add, Sub, Mul, Div}) { + Tensor, 1, 0, int> actual(kNumItems); + for (int op = Add; op <= Div; op++) { std::complex expected; - switch (op) { + switch (static_cast(op)) { case Add: gpu_out.device(gpu_device) = gpu_in1 + gpu_in2; expected = a + b; From 26f99075425cd9bf1db31d6d76a5b08570162bd2 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 20 Sep 2016 12:58:03 -0700 Subject: [PATCH 09/19] Added missing typedefs --- Eigen/src/Core/arch/CUDA/Complex.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Eigen/src/Core/arch/CUDA/Complex.h b/Eigen/src/Core/arch/CUDA/Complex.h index aa511a4b2..f133b2db9 100644 --- a/Eigen/src/Core/arch/CUDA/Complex.h +++ b/Eigen/src/Core/arch/CUDA/Complex.h @@ -25,6 +25,8 @@ namespace internal { // building for CUDA to avoid non-constexpr methods. template struct scalar_sum_op> { + typedef typename std::complex result_type; + EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex operator() (const std::complex& a, const std::complex& b) const { return std::complex(numext::real(a) + numext::real(b), @@ -33,6 +35,8 @@ template struct scalar_sum_op> { }; template struct scalar_difference_op> { + typedef typename std::complex result_type; + EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex operator() (const std::complex& a, const std::complex& b) const { return std::complex(numext::real(a) - numext::real(b), @@ -44,6 +48,8 @@ template struct scalar_product_op, std::complex> enum { Vectorizable = packet_traits>::HasMul }; + typedef typename std::complex result_type; + EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex operator() (const std::complex& a, const std::complex& b) const { const T a_real = numext::real(a); @@ -59,6 +65,8 @@ template struct scalar_quotient_op, std::complex> enum { Vectorizable = packet_traits>::HasDiv }; + typedef typename std::complex result_type; + EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex operator() (const std::complex& a, const std::complex& b) const { const T a_real = numext::real(a); From 5269d11935b4169647d0a410c026728fa6f3708f Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 21 Sep 2016 17:08:51 +0200 Subject: [PATCH 10/19] Fix compilation if ICC. --- test/svd_fill.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/svd_fill.h b/test/svd_fill.h index 5c2c61f8e..3877c0c7e 100644 --- a/test/svd_fill.h +++ b/test/svd_fill.h @@ -8,12 +8,14 @@ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. template -Array four_denorms() { return four_denorms().cast(); } +Array four_denorms(); template<> Array4f four_denorms() { return Array4f(5.60844e-39f, -5.60844e-39f, 4.94e-44f, -4.94e-44f); } template<> Array4d four_denorms() { return Array4d(5.60844e-313, -5.60844e-313, 4.94e-324, -4.94e-324); } +template +Array four_denorms() { return four_denorms().cast(); } template void svd_fill_random(MatrixType &m, int Option = 0) From ac5377e16186c35ae1609245838825c07d0aa79f Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 21 Sep 2016 17:26:04 +0200 Subject: [PATCH 11/19] Improve cost estimation of complex division --- Eigen/src/Core/util/XprHelper.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index fa60008ef..088a65240 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -671,6 +671,14 @@ struct scalar_div_cost { enum { value = 8*NumTraits::MulCost }; }; +template +struct scalar_div_cost, Vectorized> { + enum { value = 2*scalar_div_cost::value + + 6*NumTraits::MulCost + + 3*NumTraits::AddCost + }; +}; + template struct scalar_div_cost::type> { enum { value = 24 }; }; From 9fa2c8650e06c964347311aaa571c06a07dca4bd Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 21 Sep 2016 17:34:24 +0200 Subject: [PATCH 12/19] Fix alignement of statically allocated temporaries in symv, and trmv. --- Eigen/src/Core/products/SelfadjointMatrixVector.h | 2 +- Eigen/src/Core/products/TriangularMatrixVector.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h index d8d30267e..d97f8caa7 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -179,7 +179,7 @@ struct selfadjoint_product_impl { typedef typename Dest::Scalar ResScalar; typedef typename Rhs::Scalar RhsScalar; - typedef Map, Aligned> MappedDest; + typedef Map, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits::size)> MappedDest; eigen_assert(dest.rows()==a_lhs.rows() && dest.cols()==a_rhs.cols()); diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index c11a983c7..4b292e74d 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h @@ -216,7 +216,7 @@ template struct trmv_selector typedef internal::blas_traits RhsBlasTraits; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; - typedef Map, Aligned> MappedDest; + typedef Map, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits::size)> MappedDest; typename internal::add_const_on_value_type::type actualLhs = LhsBlasTraits::extract(lhs); typename internal::add_const_on_value_type::type actualRhs = RhsBlasTraits::extract(rhs); From 1fc3a21ed0ca27aef0a900b8b49e3dcb086e5157 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 21 Sep 2016 20:09:07 +0200 Subject: [PATCH 13/19] Disable a failure test if extended double precision is in use (x87) --- test/cholesky.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/cholesky.cpp b/test/cholesky.cpp index 9a1f3792c..e4af80fe2 100644 --- a/test/cholesky.cpp +++ b/test/cholesky.cpp @@ -417,6 +417,7 @@ void cholesky_faillure_cases() VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix()); VERIFY(ldlt.info()==NumericalIssue); } +#if (!EIGEN_ARCH_i386) || EIGEN_VECTORIZE_SSE2 { mat.resize(3,3); mat << -1, -3, 3, @@ -426,6 +427,7 @@ void cholesky_faillure_cases() VERIFY(ldlt.info()==NumericalIssue); VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix()); } +#endif { mat.resize(3,3); mat << 1, 2, 3, From aecc51a3e8f15ab28fb5fbfda04e027213a4c732 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 21 Sep 2016 21:53:00 +0200 Subject: [PATCH 14/19] fix typo --- test/cholesky.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/cholesky.cpp b/test/cholesky.cpp index e4af80fe2..8ad5ac639 100644 --- a/test/cholesky.cpp +++ b/test/cholesky.cpp @@ -417,7 +417,7 @@ void cholesky_faillure_cases() VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix()); VERIFY(ldlt.info()==NumericalIssue); } -#if (!EIGEN_ARCH_i386) || EIGEN_VECTORIZE_SSE2 +#if (!EIGEN_ARCH_i386) || defined(EIGEN_VECTORIZE_SSE2) { mat.resize(3,3); mat << -1, -3, 3, From 4b377715d7e62ba898c0bbd25672523b14214ceb Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Thu, 22 Sep 2016 00:10:47 +0200 Subject: [PATCH 15/19] Do not manually add absolute path to boost-library. Also set C++ standard for blaze to C++14 --- bench/btl/libs/blaze/CMakeLists.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/bench/btl/libs/blaze/CMakeLists.txt b/bench/btl/libs/blaze/CMakeLists.txt index f8b1b2ec3..e99a0855c 100644 --- a/bench/btl/libs/blaze/CMakeLists.txt +++ b/bench/btl/libs/blaze/CMakeLists.txt @@ -1,10 +1,13 @@ find_package(BLAZE) -find_package(Boost) +find_package(Boost COMPONENTS system) if (BLAZE_FOUND AND Boost_FOUND) include_directories(${BLAZE_INCLUDE_DIR} ${Boost_INCLUDE_DIRS}) btl_add_bench(btl_blaze main.cpp) + # Note: The newest blaze version requires C++14. + # Ideally, we should set this depending on the version of Blaze we found + set_property(TARGET btl_blaze PROPERTY CXX_STANDARD 14) if(BUILD_btl_blaze) - target_link_libraries(btl_blaze ${Boost_LIBRARIES} ${Boost_system_LIBRARY} /opt/local/lib/libboost_system-mt.a ) + target_link_libraries(btl_blaze ${Boost_LIBRARIES}) endif() endif () From 66cbabafed7957a7f6c03b34df854149233de596 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 22 Sep 2016 11:18:52 +0200 Subject: [PATCH 16/19] Add a note regarding gcc bug #72867 --- Eigen/src/Core/MathFunctionsImpl.h | 8 ++++++-- test/packetmath.cpp | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h index 0c77ee003..3c9ef22fa 100644 --- a/Eigen/src/Core/MathFunctionsImpl.h +++ b/Eigen/src/Core/MathFunctionsImpl.h @@ -29,8 +29,12 @@ T generic_fast_tanh_float(const T& a_x) // this range is +/-1.0f in single-precision. const T plus_9 = pset1(9.f); const T minus_9 = pset1(-9.f); - const T x = pmax(minus_9, pmin(plus_9, a_x)); - + // NOTE GCC prior to 6.3 might improperly optimize this max/min + // step such that if a_x is nan, x will be either 9 or -9, + // and tanh will return 1 or -1 instead of nan. + // This is supposed to be fixed in gcc6.3, + // see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 + const T x = pmax(minus_9,pmin(plus_9,a_x)); // The monomial coefficients of the numerator polynomial (odd). const T alpha_1 = pset1(4.89352455891786e-03f); const T alpha_3 = pset1(6.37261928875436e-04f); diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 77514d8a0..1394d9f2b 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -365,6 +365,7 @@ template void packetmath_real() } if (PacketTraits::HasTanh) { + // NOTE this test migh fail with GCC prior to 6.3, see MathFunctionsImpl.h for details. data1[0] = std::numeric_limits::quiet_NaN(); packet_helper::HasTanh,Packet> h; h.store(data2, internal::ptanh(h.load(data1))); From 8bde7da0862ca0fcd1b643170b69eba245a1750c Mon Sep 17 00:00:00 2001 From: Felix Gruber Date: Thu, 22 Sep 2016 14:50:07 +0200 Subject: [PATCH 17/19] fix documentation of LinSpaced The index of the highest value in a LinSpace is size-1. --- Eigen/src/Core/CwiseNullaryOp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index e3f20894d..25c3ef3d7 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -220,7 +220,7 @@ DenseBase::Constant(const Scalar& value) * * The function generates 'size' equally spaced values in the closed interval [low,high]. * This particular version of LinSpaced() uses sequential access, i.e. vector access is - * assumed to be a(0), a(1), ..., a(size). This assumption allows for better vectorization + * assumed to be a(0), a(1), ..., a(size-1). This assumption allows for better vectorization * and yields faster code than the random access version. * * When size is set to 1, a vector of length 1 containing 'high' is returned. @@ -389,7 +389,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(Index newSize, con /** * \brief Sets a linearly spaced vector. * - * The function fill *this with equally spaced values in the closed interval [low,high]. + * The function fills *this with equally spaced values in the closed interval [low,high]. * When size is set to 1, a vector of length 1 containing 'high' is returned. * * \only_for_vectors From ca3746c6f8f788e4cc6ad9c88d1857c85be19a3b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 22 Sep 2016 22:07:13 +0200 Subject: [PATCH 18/19] Bypass identity reflectors. --- Eigen/src/Householder/Householder.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Householder/Householder.h b/Eigen/src/Householder/Householder.h index 31f804a99..e4d81629c 100644 --- a/Eigen/src/Householder/Householder.h +++ b/Eigen/src/Householder/Householder.h @@ -119,7 +119,7 @@ void MatrixBase::applyHouseholderOnTheLeft( { *this *= Scalar(1)-tau; } - else + else if(tau!=Scalar(0)) { Map::type> tmp(workspace,cols()); Block bottom(derived(), 1, 0, rows()-1, cols()); @@ -156,7 +156,7 @@ void MatrixBase::applyHouseholderOnTheRight( { *this *= Scalar(1)-tau; } - else + else if(tau!=Scalar(0)) { Map::type> tmp(workspace,rows()); Block right(derived(), 0, 1, rows(), cols()-1); From 9bcdc8b75669d2a2ec3a7e1fe6ae96854a0bc2e4 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 22 Sep 2016 22:27:54 +0200 Subject: [PATCH 19/19] Add a nullary-functor example performing index-based sub-matrices. --- doc/CustomizingEigen_NullaryExpr.dox | 27 ++++++++++++ doc/examples/CMakeLists.txt | 5 +++ doc/examples/nullary_indexing.cpp | 66 ++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+) create mode 100644 doc/examples/nullary_indexing.cpp diff --git a/doc/CustomizingEigen_NullaryExpr.dox b/doc/CustomizingEigen_NullaryExpr.dox index d70f81065..37c8dcd89 100644 --- a/doc/CustomizingEigen_NullaryExpr.dox +++ b/doc/CustomizingEigen_NullaryExpr.dox @@ -53,6 +53,33 @@ showing that the program works as expected: This implementation of \c makeCirculant is much simpler than \ref TopicNewExpressionType "defining a new expression" from scratch. + +\section NullaryExpr_Indexing Example 2: indexing rows and columns + +The goal here is to mimic MatLab's ability to index a matrix through two vectors of indices referencing the rows and columns to be picked respectively, like this: + +\snippet nullary_indexing.out main1 + +To this end, let us first write a nullary-functor storing references to the input matrix and to the two arrays of indices, and implementing the required \c operator()(i,j): + +\snippet nullary_indexing.cpp functor + +Then, let's create an \c indexing(A,rows,cols) function creating the nullary expression: + +\snippet nullary_indexing.cpp function + +Finally, here is an example of how this function can be used: + +\snippet nullary_indexing.cpp main1 + +This straightforward implementation is already quite powerful as the row or column index arrays can also be expressions to perform offsetting, modulo, striding, reverse, etc. + +\snippet nullary_indexing.cpp main2 + +and the output is: + +\snippet nullary_indexing.out main2 + */ } diff --git a/doc/examples/CMakeLists.txt b/doc/examples/CMakeLists.txt index 08cf8efd7..f7a19055f 100644 --- a/doc/examples/CMakeLists.txt +++ b/doc/examples/CMakeLists.txt @@ -14,3 +14,8 @@ foreach(example_src ${examples_SRCS}) ) add_dependencies(all_examples ${example}) endforeach(example_src) + +check_cxx_compiler_flag("-std=c++11" EIGEN_COMPILER_SUPPORT_CPP11) +if(EIGEN_COMPILER_SUPPORT_CPP11) +ei_add_target_property(nullary_indexing COMPILE_FLAGS "-std=c++11") +endif() \ No newline at end of file diff --git a/doc/examples/nullary_indexing.cpp b/doc/examples/nullary_indexing.cpp new file mode 100644 index 000000000..e27c3585a --- /dev/null +++ b/doc/examples/nullary_indexing.cpp @@ -0,0 +1,66 @@ +#include +#include + +using namespace Eigen; + +// [functor] +template +class indexing_functor { + const ArgType &m_arg; + const RowIndexType &m_rowIndices; + const ColIndexType &m_colIndices; +public: + typedef Matrix MatrixType; + + indexing_functor(const ArgType& arg, const RowIndexType& row_indices, const ColIndexType& col_indices) + : m_arg(arg), m_rowIndices(row_indices), m_colIndices(col_indices) + {} + + const typename ArgType::Scalar& operator() (Index row, Index col) const { + return m_arg(m_rowIndices[row], m_colIndices[col]); + } +}; +// [functor] + +// [function] +template +CwiseNullaryOp, typename indexing_functor::MatrixType> +indexing(const Eigen::MatrixBase& arg, const RowIndexType& row_indices, const ColIndexType& col_indices) +{ + typedef indexing_functor Func; + typedef typename Func::MatrixType MatrixType; + return MatrixType::NullaryExpr(row_indices.size(), col_indices.size(), Func(arg.derived(), row_indices, col_indices)); +} +// [function] + + +int main() +{ + std::cout << "[main1]\n"; + Eigen::MatrixXi A = Eigen::MatrixXi::Random(4,4); + Array3i ri(1,2,1); + ArrayXi ci(6); ci << 3,2,1,0,0,2; + Eigen::MatrixXi B = indexing(A, ri, ci); + std::cout << "A =" << std::endl; + std::cout << A << std::endl << std::endl; + std::cout << "A([" << ri.transpose() << "], [" << ci.transpose() << "]) =" << std::endl; + std::cout << B << std::endl; + std::cout << "[main1]\n"; + + std::cout << "[main2]\n"; + B = indexing(A, ri+1, ci); + std::cout << "A(ri+1,ci) =" << std::endl; + std::cout << B << std::endl << std::endl; +#if __cplusplus >= 201103L + B = indexing(A, ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), ArrayXi::LinSpaced(4,0,3)); + std::cout << "A(ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), ArrayXi::LinSpaced(4,0,3)) =" << std::endl; + std::cout << B << std::endl << std::endl; +#endif + std::cout << "[main2]\n"; +} +