diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h index 8ef0fb3b5..d2b0fb282 100644 --- a/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/Eigen/src/CholmodSupport/CholmodSupport.h @@ -277,6 +277,7 @@ class CholmodBase : public SparseSolverBase if(!x_cd) { this->m_info = NumericalIssue; + return; } // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) dest = Matrix::Map(reinterpret_cast(x_cd->x),b.rows(),b.cols()); @@ -298,6 +299,7 @@ class CholmodBase : public SparseSolverBase if(!x_cs) { this->m_info = NumericalIssue; + return; } // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) dest = viewAsEigen(*x_cs); diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 74e1174ae..967a07df5 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -287,6 +287,21 @@ template EIGEN_DEVICE_FUNC inline typename unpacket_traits EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) { return a; } +template +struct protate_impl +{ + static Packet run(const Packet& a) { return a; } +}; + +/** \internal \returns a packet with the coefficients rotated to the right in little-endian convention, + * by the given offset, e.g. for offset == 1: + * (packet[3], packet[2], packet[1], packet[0]) becomes (packet[0], packet[3], packet[2], packet[1]) + */ +template EIGEN_DEVICE_FUNC inline Packet protate(const Packet& a) +{ + EIGEN_STATIC_ASSERT(offset < unpacket_traits::size, ROTATION_BY_ILLEGAL_OFFSET); + return offset ? protate_impl::run(a) : a; +} /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */ template EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) diff --git a/Eigen/src/Core/arch/CMakeLists.txt b/Eigen/src/Core/arch/CMakeLists.txt index 0db8c558d..42b0b486e 100644 --- a/Eigen/src/Core/arch/CMakeLists.txt +++ b/Eigen/src/Core/arch/CMakeLists.txt @@ -1,5 +1,9 @@ -ADD_SUBDIRECTORY(SSE) ADD_SUBDIRECTORY(AltiVec) -ADD_SUBDIRECTORY(NEON) ADD_SUBDIRECTORY(AVX) +ADD_SUBDIRECTORY(CUDA) ADD_SUBDIRECTORY(Default) +ADD_SUBDIRECTORY(NEON) +ADD_SUBDIRECTORY(SSE) + + + diff --git a/Eigen/src/Core/arch/CUDA/CMakeLists.txt b/Eigen/src/Core/arch/CUDA/CMakeLists.txt new file mode 100644 index 000000000..7ba28da7c --- /dev/null +++ b/Eigen/src/Core/arch/CUDA/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_Core_arch_CUDA_SRCS "*.h") + +INSTALL(FILES + ${Eigen_Core_arch_CUDA_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/CUDA COMPONENT Devel +) diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 8149aed7f..e9af45f22 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -309,6 +309,23 @@ template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { a_hi = vget_high_s32(a_r64); return vcombine_s32(a_hi, a_lo); } + +template +struct protate_impl +{ + static Packet4f run(const Packet4f& a) { + return vextq_f32(a, a, offset); + } +}; + +template +struct protate_impl +{ + static Packet4i run(const Packet4i& a) { + return vextq_s32(a, a, offset); + } +}; + template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); } template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); } @@ -625,6 +642,14 @@ template<> EIGEN_STRONG_INLINE double pfirst(const Packet2d& a) { retu template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); } +template +struct protate_impl +{ + static Packet2d run(const Packet2d& a) { + return vextq_f64(a, a, offset); + } +}; + template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); } #if EIGEN_COMP_CLANG && defined(__apple_build_version__) diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index 9ffba5b41..f86c0a39a 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -138,7 +138,6 @@ Packet4f pexp(const Packet4f& _x) #ifdef EIGEN_VECTORIZE_SSE4_1 fx = _mm_floor_ps(fx); #else - tmp = _mm_setzero_ps(); emm0 = _mm_cvttps_epi32(fx); tmp = _mm_cvtepi32_ps(emm0); /* if greater, substract 1 */ @@ -207,7 +206,6 @@ Packet2d pexp(const Packet2d& _x) #ifdef EIGEN_VECTORIZE_SSE4_1 fx = _mm_floor_pd(fx); #else - tmp = _mm_setzero_pd(); emm0 = _mm_cvttpd_epi32(fx); tmp = _mm_cvtepi32_pd(emm0); /* if greater, substract 1 */ diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index b5a0ba2bc..3653783fd 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -462,6 +462,29 @@ template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return _mm_shuffle_epi32(a,0x1B); } +template +struct protate_impl +{ + static Packet4f run(const Packet4f& a) { + return vec4f_swizzle1(a, offset, (offset + 1) % 4, (offset + 2) % 4, (offset + 3) % 4); + } +}; + +template +struct protate_impl +{ + static Packet4i run(const Packet4i& a) { + return vec4i_swizzle1(a, offset, (offset + 1) % 4, (offset + 2) % 4, (offset + 3) % 4); + } +}; + +template +struct protate_impl +{ + static Packet2d run(const Packet2d& a) { + return vec2d_swizzle1(a, offset, (offset + 1) % 2); + } +}; template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 15bf04d1f..ccd906540 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -79,23 +79,37 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff * - the number of scalars that fit into a packet (when vectorization is enabled). * * \sa setCpuCacheSizes */ -#define CEIL(a, b) ((a)+(b)-1)/(b) -template -void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n, int num_threads) +template +void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1) { + typedef gebp_traits Traits; + +#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES + EIGEN_UNUSED_VARIABLE(num_threads); + enum { + kr = 16, + mr = Traits::mr, + nr = Traits::nr + }; + k = std::min(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K); + if (k > kr) k -= k % kr; + m = std::min(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M); + if (m > mr) m -= m % mr; + n = std::min(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N); + if (n > nr) n -= n % nr; + return; +#endif + // Explanations: - // Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and - // mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed - // per kc x nr vertical small panels where nr is the blocking size along the n dimension - // at the register level. For vectorization purpose, these small vertical panels are unpacked, - // e.g., each coefficient is replicated to fit a packet. This small vertical panel has to - // stay in L1 cache. + // Let's recall that the product algorithms form mc x kc vertical panels A' on the lhs and + // kc x nc blocks B' on the rhs. B' has to fit into L2/L3 cache. Moreover, A' is processed + // per mr x kc horizontal small panels where mr is the blocking size along the m dimension + // at the register level. This small horizontal panel has to stay within L1 cache. std::ptrdiff_t l1, l2, l3; manage_caching_sizes(GetAction, &l1, &l2, &l3); if (num_threads > 1) { - typedef gebp_traits Traits; typedef typename Traits::ResScalar ResScalar; enum { kdiv = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)), @@ -108,32 +122,32 @@ void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n, int num_ nr = Traits::nr, nr_mask = (0xffffffff/nr)*nr }; - SizeType k_cache = (l1-ksub)/kdiv; + Index k_cache = (l1-ksub)/kdiv; if (k_cache < k) { k = k_cache & k_mask; - eigen_assert(k > 0); + eigen_internal_assert(k > 0); } - SizeType n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k); - SizeType n_per_thread = CEIL(n, num_threads); + Index n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k); + Index n_per_thread = numext::div_ceil(n, num_threads); if (n_cache <= n_per_thread) { // Don't exceed the capacity of the l2 cache. - eigen_assert(n_cache >= static_cast(nr)); + eigen_internal_assert(n_cache >= static_cast(nr)); n = n_cache & nr_mask; - eigen_assert(n > 0); + eigen_internal_assert(n > 0); } else { - n = (std::min)(n, (n_per_thread + nr - 1) & nr_mask); + n = (std::min)(n, (n_per_thread + nr - 1) & nr_mask); } if (l3 > l2) { // l3 is shared between all cores, so we'll give each thread its own chunk of l3. - SizeType m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads); - SizeType m_per_thread = CEIL(m, num_threads); - if(m_cache < m_per_thread && m_cache >= static_cast(mr)) { + Index m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads); + Index m_per_thread = numext::div_ceil(m, num_threads); + if(m_cache < m_per_thread && m_cache >= static_cast(mr)) { m = m_cache & mr_mask; - eigen_assert(m > 0); + eigen_internal_assert(m > 0); } else { - m = (std::min)(m, (m_per_thread + mr - 1) & mr_mask); + m = (std::min)(m, (m_per_thread + mr - 1) & mr_mask); } } } @@ -141,19 +155,19 @@ void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n, int num_ // In unit tests we do not want to use extra large matrices, // so we reduce the block size to check the blocking strategy is not flawed #ifndef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS - k = std::min(k,sizeof(LhsScalar)<=4 ? 360 : 240); - n = std::min(n,3840/sizeof(RhsScalar)); - m = std::min(m,3840/sizeof(RhsScalar)); + k = std::min(k,sizeof(LhsScalar)<=4 ? 360 : 240); + n = std::min(n,3840/sizeof(RhsScalar)); + m = std::min(m,3840/sizeof(RhsScalar)); #else - k = std::min(k,24); - n = std::min(n,384/sizeof(RhsScalar)); - m = std::min(m,384/sizeof(RhsScalar)); + k = std::min(k,24); + n = std::min(n,384/sizeof(RhsScalar)); + m = std::min(m,384/sizeof(RhsScalar)); #endif } } -template -inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n, int num_threads) +template +inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1) { computeProductBlockingSizes(k, m, n, num_threads); } @@ -758,7 +772,7 @@ void gebp_kernel(&blB[(0+4*K)*RhsProgress]); \ + } else { \ + EIGEN_ASM_COMMENT("Do not reorder code, we're very tight on registers"); \ + B_0 = protate<1>(B_0); \ + } \ + } else { \ + EIGEN_GEBP_ONESTEP_LOADRHS_NONROTATING(K,N); \ + } \ + } while (false) +#else +#define EIGEN_GEBP_ONESTEP_LOADRHS(K,N) \ + EIGEN_GEBP_ONESTEP_LOADRHS_NONROTATING(K,N) +#endif + +#define EIGEN_GEBP_ONESTEP(K) \ do { \ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \ @@ -814,34 +859,34 @@ void gebp_kernel resblock; \ + resblock.packet[0] = res0; \ + resblock.packet[1] = res1; \ + resblock.packet[2] = res2; \ + resblock.packet[3] = res3; \ + ptranspose(resblock); \ + resblock.packet[3] = protate<1>(resblock.packet[3]); \ + resblock.packet[2] = protate<2>(resblock.packet[2]); \ + resblock.packet[1] = protate<3>(resblock.packet[1]); \ + ptranspose(resblock); \ + res0 = resblock.packet[0]; \ + res1 = resblock.packet[1]; \ + res2 = resblock.packet[2]; \ + res3 = resblock.packet[3]; \ + } while (false) + + EIGEN_GEBP_UNROTATE_RESULT(C0, C1, C2, C3); + EIGEN_GEBP_UNROTATE_RESULT(C4, C5, C6, C7); + EIGEN_GEBP_UNROTATE_RESULT(C8, C9, C10, C11); + } +#endif + ResPacket R0, R1, R2; ResPacket alphav = pset1(alpha); @@ -1788,14 +1862,14 @@ EIGEN_DONT_INLINE void gemm_pack_rhs kernel; kernel.packet[0] = dm0.loadPacket(k); - kernel.packet[1] = dm1.loadPacket(k); - kernel.packet[2] = dm2.loadPacket(k); - kernel.packet[3] = dm3.loadPacket(k); + kernel.packet[1%PacketSize] = dm1.loadPacket(k); + kernel.packet[2%PacketSize] = dm2.loadPacket(k); + kernel.packet[3%PacketSize] = dm3.loadPacket(k); ptranspose(kernel); pstoreu(blockB+count+0*PacketSize, cj.pconj(kernel.packet[0])); - pstoreu(blockB+count+1*PacketSize, cj.pconj(kernel.packet[1])); - pstoreu(blockB+count+2*PacketSize, cj.pconj(kernel.packet[2])); - pstoreu(blockB+count+3*PacketSize, cj.pconj(kernel.packet[3])); + pstoreu(blockB+count+1*PacketSize, cj.pconj(kernel.packet[1%PacketSize])); + pstoreu(blockB+count+2*PacketSize, cj.pconj(kernel.packet[2%PacketSize])); + pstoreu(blockB+count+3*PacketSize, cj.pconj(kernel.packet[3%PacketSize])); count+=4*PacketSize; } } diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index 44e44b986..c38c12c31 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -217,8 +217,9 @@ struct gemm_functor : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking) {} - void initParallelSession() const + void initParallelSession(Index num_threads) const { + m_blocking.initParallel(m_lhs.rows(), m_rhs.cols(), m_lhs.cols(), num_threads); m_blocking.allocateA(); } @@ -276,7 +277,7 @@ class level3_blocking }; template -class gemm_blocking_space +class gemm_blocking_space : public level3_blocking< typename conditional::type, typename conditional::type> @@ -299,7 +300,7 @@ class gemm_blocking_spacem_mc = ActualRows; this->m_nc = ActualCols; @@ -307,6 +308,9 @@ class gemm_blocking_spacem_blockA = m_staticA; this->m_blockB = m_staticB; } + + void initParallel(Index, Index, Index, Index) + {} inline void allocateA() {} inline void allocateB() {} @@ -331,7 +335,7 @@ class gemm_blocking_spacem_mc = Transpose ? cols : rows; this->m_nc = Transpose ? rows : cols; @@ -351,6 +355,19 @@ class gemm_blocking_spacem_mc * this->m_kc; m_sizeB = this->m_kc * this->m_nc; } + + void initParallel(Index rows, Index cols, Index depth, Index num_threads) + { + this->m_mc = Transpose ? cols : rows; + this->m_nc = Transpose ? rows : cols; + this->m_kc = depth; + + eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0); + Index m = this->m_mc; + computeProductBlockingSizes(this->m_kc, m, this->m_nc, num_threads); + m_sizeA = this->m_mc * this->m_kc; + m_sizeB = this->m_kc * this->m_nc; + } void allocateA() { diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h index 2b90abf8f..91d37a123 100644 --- a/Eigen/src/Core/products/Parallelizer.h +++ b/Eigen/src/Core/products/Parallelizer.h @@ -120,25 +120,28 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos return func(0,rows, 0,cols); Eigen::initParallel(); - func.initParallelSession(); + func.initParallelSession(threads); if(transpose) std::swap(rows,cols); - - Index blockCols = (cols / threads) & ~Index(0x3); - Index blockRows = (rows / threads); - blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr; ei_declare_aligned_stack_constructed_variable(GemmParallelInfo,info,threads,0); - + #pragma omp parallel num_threads(threads) { Index i = omp_get_thread_num(); + // Note that the actual number of threads might be lower than the number of request ones. + Index actual_threads = omp_get_num_threads(); + + Index blockCols = (cols / actual_threads) & ~Index(0x3); + Index blockRows = (rows / actual_threads); + blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr; + Index r0 = i*blockRows; - Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows; + Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows; Index c0 = i*blockCols; - Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols; + Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols; info[i].lhs_start = r0; info[i].lhs_length = actualBlockRows; diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 3ec55fad2..9bfa45106 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -166,7 +166,7 @@ class BlasLinearMapper { return ploadt(m_data + i); } - EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const { + EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const { pstoret(m_data + i, p); } diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index e607cdd12..aaea9f035 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -382,6 +382,11 @@ #define EIGEN_HAVE_RVALUE_REFERENCES #endif +// Does the compiler support result_of? +#if (__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L)) +#define EIGEN_HAS_STD_RESULT_OF 1 +#endif + // Does the compiler support variadic templates? #if __cplusplus > 199711L #define EIGEN_HAS_VARIADIC_TEMPLATES 1 diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index f3bafd5af..674cd8f97 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -165,6 +165,7 @@ template struct result_of {}; struct has_none {int a[1];}; struct has_std_result_type {int a[2];}; struct has_tr1_result {int a[3];}; +struct has_cxx_eleven_result {int a[4];}; template struct unary_result_of_select {typedef ArgType type;}; @@ -175,13 +176,22 @@ struct unary_result_of_select {typed template struct unary_result_of_select {typedef typename Func::template result::type type;}; +#ifdef EIGEN_HAS_STD_RESULT_OF +template +struct unary_result_of_select {typedef typename std::result_of::type type;}; +#endif + template struct result_of { template - static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); + static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); template - static has_tr1_result testFunctor(T const *, typename T::template result::type const * = 0); - static has_none testFunctor(...); + static has_tr1_result testFunctor(T const *, typename T::template result::type const * = 0); +#ifdef EIGEN_HAS_STD_RESULT_OF + template + static has_cxx_eleven_result testFunctor(T const *, typename std::result_of::type const * = 0); +#endif + static has_none testFunctor(...); // note that the following indirection is needed for gcc-3.3 enum {FunctorType = sizeof(testFunctor(static_cast(0)))}; @@ -199,13 +209,23 @@ template struct binary_result_of_select {typedef typename Func::template result::type type;}; +#ifdef EIGEN_HAS_STD_RESULT_OF +template +struct binary_result_of_select +{typedef typename std::result_of::type type;}; +#endif + template struct result_of { template - static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); + static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); template - static has_tr1_result testFunctor(T const *, typename T::template result::type const * = 0); - static has_none testFunctor(...); + static has_tr1_result testFunctor(T const *, typename T::template result::type const * = 0); +#ifdef EIGEN_HAS_STD_RESULT_OF + template + static has_cxx_eleven_result testFunctor(T const *, typename std::result_of::type const * = 0); +#endif + static has_none testFunctor(...); // note that the following indirection is needed for gcc-3.3 enum {FunctorType = sizeof(testFunctor(static_cast(0)))}; @@ -284,6 +304,14 @@ template EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b = template EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); } #endif +// Integer division with rounding up. +// T is assumed to be an integer type with a>=0, and b>0 +template +T div_ceil(const T &a, const T &b) +{ + return (a+b-1) / b; +} + } // end namespace numext } // end namespace Eigen diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index 7538a0633..5e16b775b 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -93,7 +93,8 @@ THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG, IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY, - STORAGE_LAYOUT_DOES_NOT_MATCH + STORAGE_LAYOUT_DOES_NOT_MATCH, + ROTATION_BY_ILLEGAL_OFFSET }; }; diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h index 40dc1a2bd..acd82e926 100644 --- a/Eigen/src/SparseCore/SparseBlock.h +++ b/Eigen/src/SparseCore/SparseBlock.h @@ -292,7 +292,8 @@ const typename SparseMatrixBase::ConstInnerVectorReturnType SparseMatri * is col-major (resp. row-major). */ template -Block SparseMatrixBase::innerVectors(Index outerStart, Index outerSize) +typename SparseMatrixBase::InnerVectorsReturnType +SparseMatrixBase::innerVectors(Index outerStart, Index outerSize) { return Block(derived(), IsRowMajor ? outerStart : 0, IsRowMajor ? 0 : outerStart, @@ -304,7 +305,8 @@ Block SparseMatrixBase::innerVectors(Inde * is col-major (resp. row-major). Read-only. */ template -const Block SparseMatrixBase::innerVectors(Index outerStart, Index outerSize) const +const typename SparseMatrixBase::ConstInnerVectorsReturnType +SparseMatrixBase::innerVectors(Index outerStart, Index outerSize) const { return Block(derived(), IsRowMajor ? outerStart : 0, IsRowMajor ? 0 : outerStart, diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 4c8965802..4562f3df9 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -467,6 +467,8 @@ class SparseMatrix if(isCompressed()) return; + eigen_internal_assert(m_outerIndex!=0 && m_outerSize>0); + Index oldStart = m_outerIndex[1]; m_outerIndex[1] = m_innerNonZeros[0]; for(Index j=1; j class SparseMatrixBase : public EigenBase const ConstInnerVectorReturnType innerVector(Index outer) const; // set of inner-vectors - Block innerVectors(Index outerStart, Index outerSize); - const Block innerVectors(Index outerStart, Index outerSize) const; + typedef Block InnerVectorsReturnType; + typedef Block ConstInnerVectorsReturnType; + InnerVectorsReturnType innerVectors(Index outerStart, Index outerSize); + const ConstInnerVectorsReturnType innerVectors(Index outerStart, Index outerSize) const; DenseMatrixType toDense() const { diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt index 46e5fc9d7..4d01a0424 100644 --- a/doc/CMakeLists.txt +++ b/doc/CMakeLists.txt @@ -100,7 +100,8 @@ add_custom_target(doc ALL COMMAND ${CMAKE_COMMAND} -E copy ${Eigen_BINARY_DIR}/doc/html/group__TopicUnalignedArrayAssert.html ${Eigen_BINARY_DIR}/doc/html/TopicUnalignedArrayAssert.html COMMAND ${CMAKE_COMMAND} -E rename html eigen-doc COMMAND ${CMAKE_COMMAND} -E remove eigen-doc/eigen-doc.tgz - COMMAND ${CMAKE_COMMAND} -E tar cfz eigen-doc/eigen-doc.tgz eigen-doc + COMMAND ${CMAKE_COMMAND} -E tar cfz eigen-doc.tgz eigen-doc + COMMAND ${CMAKE_COMMAND} -E rename eigen-doc.tgz eigen-doc/eigen-doc.tgz COMMAND ${CMAKE_COMMAND} -E rename eigen-doc html WORKING_DIRECTORY ${Eigen_BINARY_DIR}/doc) diff --git a/test/cholesky.cpp b/test/cholesky.cpp index 33e32a322..9335270f4 100644 --- a/test/cholesky.cpp +++ b/test/cholesky.cpp @@ -380,10 +380,14 @@ void test_cholesky() CALL_SUBTEST_3( cholesky_definiteness(Matrix2d()) ); CALL_SUBTEST_4( cholesky(Matrix3f()) ); CALL_SUBTEST_5( cholesky(Matrix4d()) ); - s = internal::random(1,EIGEN_TEST_MAX_SIZE); + + s = internal::random(1,EIGEN_TEST_MAX_SIZE); CALL_SUBTEST_2( cholesky(MatrixXd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + s = internal::random(1,EIGEN_TEST_MAX_SIZE/2); CALL_SUBTEST_6( cholesky_cplx(MatrixXcd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) } CALL_SUBTEST_4( cholesky_verify_assert() ); @@ -395,6 +399,5 @@ void test_cholesky() CALL_SUBTEST_9( LLT(10) ); CALL_SUBTEST_9( LDLT(10) ); - TEST_SET_BUT_UNUSED_VARIABLE(s) TEST_SET_BUT_UNUSED_VARIABLE(nb_temporaries) } diff --git a/test/eigensolver_complex.cpp b/test/eigensolver_complex.cpp index c9d8c0877..bf8d2deb0 100644 --- a/test/eigensolver_complex.cpp +++ b/test/eigensolver_complex.cpp @@ -108,6 +108,7 @@ void test_eigensolver_complex() CALL_SUBTEST_2( eigensolver(MatrixXcd(s,s)) ); CALL_SUBTEST_3( eigensolver(Matrix, 1, 1>()) ); CALL_SUBTEST_4( eigensolver(Matrix3f()) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) } CALL_SUBTEST_1( eigensolver_verify_assert(Matrix4cf()) ); s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); diff --git a/test/eigensolver_generic.cpp b/test/eigensolver_generic.cpp index 92d33f66a..c5441ac4e 100644 --- a/test/eigensolver_generic.cpp +++ b/test/eigensolver_generic.cpp @@ -93,6 +93,7 @@ void test_eigensolver_generic() CALL_SUBTEST_1( eigensolver(Matrix4f()) ); s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); CALL_SUBTEST_2( eigensolver(MatrixXd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) // some trivial but implementation-wise tricky cases CALL_SUBTEST_2( eigensolver(MatrixXd(1,1)) ); diff --git a/test/eigensolver_selfadjoint.cpp b/test/eigensolver_selfadjoint.cpp index 935736328..7b0077a6d 100644 --- a/test/eigensolver_selfadjoint.cpp +++ b/test/eigensolver_selfadjoint.cpp @@ -154,15 +154,13 @@ void test_eigensolver_selfadjoint() CALL_SUBTEST_13( selfadjointeigensolver(Matrix3f()) ); CALL_SUBTEST_13( selfadjointeigensolver(Matrix3d()) ); CALL_SUBTEST_2( selfadjointeigensolver(Matrix4d()) ); - s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); - CALL_SUBTEST_3( selfadjointeigensolver(MatrixXf(s,s)) ); - s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); - CALL_SUBTEST_4( selfadjointeigensolver(MatrixXd(s,s)) ); - s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); - CALL_SUBTEST_5( selfadjointeigensolver(MatrixXcd(s,s)) ); s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); + CALL_SUBTEST_3( selfadjointeigensolver(MatrixXf(s,s)) ); + CALL_SUBTEST_4( selfadjointeigensolver(MatrixXd(s,s)) ); + CALL_SUBTEST_5( selfadjointeigensolver(MatrixXcd(s,s)) ); CALL_SUBTEST_9( selfadjointeigensolver(Matrix,Dynamic,Dynamic,RowMajor>(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) // some trivial but implementation-wise tricky cases CALL_SUBTEST_4( selfadjointeigensolver(MatrixXd(1,1)) ); diff --git a/test/inverse.cpp b/test/inverse.cpp index 1e7b20958..b09989aca 100644 --- a/test/inverse.cpp +++ b/test/inverse.cpp @@ -102,12 +102,16 @@ void test_inverse() CALL_SUBTEST_3( inverse(Matrix3f()) ); CALL_SUBTEST_4( inverse(Matrix4f()) ); CALL_SUBTEST_4( inverse(Matrix()) ); + s = internal::random(50,320); CALL_SUBTEST_5( inverse(MatrixXf(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + s = internal::random(25,100); CALL_SUBTEST_6( inverse(MatrixXcd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + CALL_SUBTEST_7( inverse(Matrix4d()) ); CALL_SUBTEST_7( inverse(Matrix()) ); } - TEST_SET_BUT_UNUSED_VARIABLE(s) } diff --git a/test/product_large.cpp b/test/product_large.cpp index ffb8b7bf2..84c489580 100644 --- a/test/product_large.cpp +++ b/test/product_large.cpp @@ -64,8 +64,7 @@ void test_product_large() #endif // Regression test for bug 714: -#ifdef EIGEN_HAS_OPENMP - std::cout << "Testing omp_set_dynamic(1)\n"; +#if defined EIGEN_HAS_OPENMP omp_set_dynamic(1); for(int i = 0; i < g_repeat; i++) { CALL_SUBTEST_6( product(Matrix(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); diff --git a/test/product_notemporary.cpp b/test/product_notemporary.cpp index 805cc8939..898f1d1cb 100644 --- a/test/product_notemporary.cpp +++ b/test/product_notemporary.cpp @@ -129,11 +129,12 @@ void test_product_notemporary() for(int i = 0; i < g_repeat; i++) { s = internal::random(16,EIGEN_TEST_MAX_SIZE); CALL_SUBTEST_1( product_notemporary(MatrixXf(s, s)) ); - s = internal::random(16,EIGEN_TEST_MAX_SIZE); CALL_SUBTEST_2( product_notemporary(MatrixXd(s, s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + s = internal::random(16,EIGEN_TEST_MAX_SIZE/2); CALL_SUBTEST_3( product_notemporary(MatrixXcf(s,s)) ); - s = internal::random(16,EIGEN_TEST_MAX_SIZE/2); CALL_SUBTEST_4( product_notemporary(MatrixXcd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) } } diff --git a/test/product_selfadjoint.cpp b/test/product_selfadjoint.cpp index 374e2393b..3d768aa7e 100644 --- a/test/product_selfadjoint.cpp +++ b/test/product_selfadjoint.cpp @@ -67,14 +67,21 @@ void test_product_selfadjoint() CALL_SUBTEST_1( product_selfadjoint(Matrix()) ); CALL_SUBTEST_2( product_selfadjoint(Matrix()) ); CALL_SUBTEST_3( product_selfadjoint(Matrix3d()) ); + s = internal::random(1,EIGEN_TEST_MAX_SIZE/2); CALL_SUBTEST_4( product_selfadjoint(MatrixXcf(s, s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + s = internal::random(1,EIGEN_TEST_MAX_SIZE/2); CALL_SUBTEST_5( product_selfadjoint(MatrixXcd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + s = internal::random(1,EIGEN_TEST_MAX_SIZE); CALL_SUBTEST_6( product_selfadjoint(MatrixXd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + s = internal::random(1,EIGEN_TEST_MAX_SIZE); CALL_SUBTEST_7( product_selfadjoint(Matrix(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) } - TEST_SET_BUT_UNUSED_VARIABLE(s) } diff --git a/test/product_syrk.cpp b/test/product_syrk.cpp index 73c95000c..e10f0f2f2 100644 --- a/test/product_syrk.cpp +++ b/test/product_syrk.cpp @@ -125,11 +125,12 @@ void test_product_syrk() int s; s = internal::random(1,EIGEN_TEST_MAX_SIZE); CALL_SUBTEST_1( syrk(MatrixXf(s, s)) ); - s = internal::random(1,EIGEN_TEST_MAX_SIZE); CALL_SUBTEST_2( syrk(MatrixXd(s, s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + s = internal::random(1,EIGEN_TEST_MAX_SIZE/2); CALL_SUBTEST_3( syrk(MatrixXcf(s, s)) ); - s = internal::random(1,EIGEN_TEST_MAX_SIZE/2); CALL_SUBTEST_4( syrk(MatrixXcd(s, s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) } } diff --git a/test/product_trmv.cpp b/test/product_trmv.cpp index 4c3c435c2..57a202afc 100644 --- a/test/product_trmv.cpp +++ b/test/product_trmv.cpp @@ -78,12 +78,14 @@ void test_product_trmv() CALL_SUBTEST_1( trmv(Matrix()) ); CALL_SUBTEST_2( trmv(Matrix()) ); CALL_SUBTEST_3( trmv(Matrix3d()) ); + s = internal::random(1,EIGEN_TEST_MAX_SIZE/2); CALL_SUBTEST_4( trmv(MatrixXcf(s,s)) ); - s = internal::random(1,EIGEN_TEST_MAX_SIZE/2); CALL_SUBTEST_5( trmv(MatrixXcd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + s = internal::random(1,EIGEN_TEST_MAX_SIZE); CALL_SUBTEST_6( trmv(Matrix(s, s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) } - TEST_SET_BUT_UNUSED_VARIABLE(s); } diff --git a/test/sizeoverflow.cpp b/test/sizeoverflow.cpp index 16d6f8d04..240d22294 100644 --- a/test/sizeoverflow.cpp +++ b/test/sizeoverflow.cpp @@ -18,8 +18,6 @@ VERIFY(threw && "should have thrown bad_alloc: " #a); \ } -typedef DenseIndex Index; - template void triggerMatrixBadAlloc(Index rows, Index cols) {