From 0a8e4712d1f3cfe8830783f59c6c9987ea34701c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 2 Jul 2014 16:13:05 +0200 Subject: [PATCH 01/31] Do not attempt to include on Windows CE --- Eigen/Core | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/Core b/Eigen/Core index a135afc6a..16e388fa4 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -205,7 +205,7 @@ #endif // required for __cpuid, needs to be included after cmath -#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_X64)) +#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_X64)) && (!defined(_WIN32_WCE)) #include #endif From 998455a5707e1312d3058672d6d337a20158d86e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 2 Jul 2014 23:04:46 +0200 Subject: [PATCH 02/31] LDLT is not rank-revealing, so we should not attempt to use the biggest diagonal elements as thresholds. --- Eigen/src/Cholesky/LDLT.h | 27 +++++++++++++-------------- test/cholesky.cpp | 20 ++++++++++++++++++++ 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h index 1d84438e2..67a99a0af 100644 --- a/Eigen/src/Cholesky/LDLT.h +++ b/Eigen/src/Cholesky/LDLT.h @@ -258,23 +258,13 @@ template<> struct ldlt_inplace return true; } - RealScalar cutoff(0), biggest_in_corner; - for (Index k = 0; k < size; ++k) { // Find largest diagonal element Index index_of_biggest_in_corner; - biggest_in_corner = mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner); + mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner); index_of_biggest_in_corner += k; - if(k == 0) - { - // The biggest overall is the point of reference to which further diagonals - // are compared; if any diagonal is negligible compared - // to the largest overall, the algorithm bails. - cutoff = abs(NumTraits::epsilon() * biggest_in_corner); - } - transpositions.coeffRef(k) = index_of_biggest_in_corner; if(k != index_of_biggest_in_corner) { @@ -311,7 +301,11 @@ template<> struct ldlt_inplace A21.noalias() -= A20 * temp.head(k); } - if((rs>0) && (abs(mat.coeffRef(k,k)) > cutoff)) + // In some previous versions of Eigen (e.g., 3.2.1), the scaling was omitted if the pivot + // was smaller than the cutoff value. However, soince LDLT is not rank-revealing + // we should only make sure we do not introduce INF or NaN values. + // LAPACK also uses 0 as the cutoff value. + if((rs>0) && (abs(mat.coeffRef(k,k)) > RealScalar(0))) A21 /= mat.coeffRef(k,k); RealScalar realAkk = numext::real(mat.coeffRef(k,k)); @@ -495,8 +489,13 @@ struct solve_retval, Rhs> typedef typename LDLTType::Scalar Scalar; typedef typename LDLTType::RealScalar RealScalar; const Diagonal vectorD = dec().vectorD(); - RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() * NumTraits::epsilon(), - RealScalar(1) / NumTraits::highest()); // motivated by LAPACK's xGELSS + // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon + // as motivated by LAPACK's xGELSS: + // RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() *NumTraits::epsilon(),RealScalar(1) / NumTraits::highest()); + // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest + // diagonal element is not well justified and to numerical issues in some cases. + // Moreover, Lapack's xSYTRS routines use 0 for the tolerance. + RealScalar tolerance = RealScalar(1) / NumTraits::highest(); for (Index i = 0; i < vectorD.size(); ++i) { if(abs(vectorD(i)) > tolerance) dst.row(i) /= vectorD(i); diff --git a/test/cholesky.cpp b/test/cholesky.cpp index 569318f83..9aa7da7bb 100644 --- a/test/cholesky.cpp +++ b/test/cholesky.cpp @@ -68,6 +68,7 @@ template void cholesky(const MatrixType& m) Index cols = m.cols(); typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; typedef Matrix SquareMatrixType; typedef Matrix VectorType; @@ -207,6 +208,25 @@ template void cholesky(const MatrixType& m) vecX = ldltlo.solve(vecB); VERIFY_IS_APPROX(A * vecX, vecB); } + + // check matrices with wide spectrum + if(rows>=3) + { + RealScalar s = (std::min)(16,std::numeric_limits::max_exponent10/8); + Matrix a = Matrix::Random(rows,rows); + Matrix d = Matrix::Random(rows); + for(int k=0; k(-s,s)); + SquareMatrixType A = a * d.asDiagonal() * a.adjoint(); + // Make sure a solution exists: + vecX.setRandom(); + vecB = A * vecX; + vecX.setZero(); + ldltlo.compute(A); + VERIFY_IS_APPROX(A, ldltlo.reconstructedMatrix()); + vecX = ldltlo.solve(vecB); + VERIFY_IS_APPROX(A * vecX, vecB); + } } // update/downdate From 3a9f9faada5de3a13611d7543014c5ef503eff58 Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Fri, 4 Jul 2014 12:48:24 +0200 Subject: [PATCH 03/31] Fix unused typedef warning --- Eigen/src/Cholesky/LDLT.h | 1 - 1 file changed, 1 deletion(-) diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h index 67a99a0af..ef81030f8 100644 --- a/Eigen/src/Cholesky/LDLT.h +++ b/Eigen/src/Cholesky/LDLT.h @@ -486,7 +486,6 @@ struct solve_retval, Rhs> using std::abs; EIGEN_USING_STD_MATH(max); typedef typename LDLTType::MatrixType MatrixType; - typedef typename LDLTType::Scalar Scalar; typedef typename LDLTType::RealScalar RealScalar; const Diagonal vectorD = dec().vectorD(); // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon From f36538049669a7efee57f2b1e3c60bf8bf3976bb Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Fri, 4 Jul 2014 12:52:55 +0200 Subject: [PATCH 04/31] Fix regression introduced by 3117036b80075390dbc46f60aa0d595e5a44661b : Matrix(int) did not compile if Scalar is not constructible from int. Now this falls back to the (Index size) constructor. --- Eigen/src/Core/PlainObjectBase.h | 4 ++-- Eigen/src/Core/util/Meta.h | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index b5b25ef70..ae5b342ce 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -692,7 +692,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type template EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if::type* = 0) + EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if::value,T>::type* = 0) { EIGEN_STATIC_ASSERT(bool(NumTraits::IsInteger), FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED) @@ -700,7 +700,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type } template EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if::type* = 0) + EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if::value,T>::type* = 0) { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1) m_storage.data()[0] = val0; diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index e4e4d4a87..795197f59 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -80,6 +80,25 @@ template struct add_const_on_value_type { typedef T const template struct add_const_on_value_type { typedef T const* const type; }; template struct add_const_on_value_type { typedef T const* const type; }; + +template +struct is_convertible +{ +private: + struct yes {int a[1];}; + struct no {int a[2];}; + + template + static yes test (const T&) {} + + template static no test (...) {} + +public: + static From ms_from; + enum { value = sizeof(test(ms_from))==sizeof(yes) }; +}; + + /** \internal Allows to enable/disable an overload * according to a compile time condition. */ From 8ee38d2db69aa7245d4aa1c5cb6a177a866da20f Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Sat, 5 Jul 2014 21:48:48 +0800 Subject: [PATCH 05/31] Fix dox for namespaces --- Eigen/src/Jacobi/Jacobi.h | 2 +- unsupported/Eigen/src/IterativeSolvers/Scaling.h | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Jacobi/Jacobi.h b/Eigen/src/Jacobi/Jacobi.h index 956f72d57..da9fb53d0 100644 --- a/Eigen/src/Jacobi/Jacobi.h +++ b/Eigen/src/Jacobi/Jacobi.h @@ -255,13 +255,13 @@ void JacobiRotation::makeGivens(const Scalar& p, const Scalar& q, Scalar * Implementation of MatrixBase methods ****************************************************************************************/ +namespace internal { /** \jacobi_module * Applies the clock wise 2D rotation \a j to the set of 2D vectors of cordinates \a x and \a y: * \f$ \left ( \begin{array}{cc} x \\ y \end{array} \right ) = J \left ( \begin{array}{cc} x \\ y \end{array} \right ) \f$ * * \sa MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight() */ -namespace internal { template void apply_rotation_in_the_plane(VectorX& _x, VectorY& _y, const JacobiRotation& j); } diff --git a/unsupported/Eigen/src/IterativeSolvers/Scaling.h b/unsupported/Eigen/src/IterativeSolvers/Scaling.h index 4fd439202..9189fddf1 100644 --- a/unsupported/Eigen/src/IterativeSolvers/Scaling.h +++ b/unsupported/Eigen/src/IterativeSolvers/Scaling.h @@ -9,6 +9,10 @@ #ifndef EIGEN_ITERSCALING_H #define EIGEN_ITERSCALING_H + +namespace Eigen { +using std::abs; + /** * \ingroup IterativeSolvers_Module * \brief iterative scaling algorithm to equilibrate rows and column norms in matrices @@ -41,8 +45,6 @@ * * \sa \ref IncompleteLUT */ -namespace Eigen { -using std::abs; template class IterScaling { From 1a817d3b7090c0cecd648003e0b24d434967d36e Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Sat, 5 Jul 2014 21:50:05 +0800 Subject: [PATCH 06/31] Document internal namespace --- Eigen/Core | 3 +++ unsupported/Eigen/doxygen.hpp | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 unsupported/Eigen/doxygen.hpp diff --git a/Eigen/Core b/Eigen/Core index 16e388fa4..3f1efb538 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -220,6 +220,9 @@ /** \brief Namespace containing all symbols from the %Eigen library. */ namespace Eigen { +/** \brief Namespace containing low-level routines from the %Eigen library. */ +namespace internal {} + inline static const char *SimdInstructionSetsInUse(void) { #if defined(EIGEN_VECTORIZE_AVX) return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; diff --git a/unsupported/Eigen/doxygen.hpp b/unsupported/Eigen/doxygen.hpp new file mode 100644 index 000000000..031488527 --- /dev/null +++ b/unsupported/Eigen/doxygen.hpp @@ -0,0 +1,19 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Chen-Pang He +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// As its name suggests, this file is intended to be read by Doxygen. +// There is nothing defined so we need no #include guard. + +//! \brief Namespace containing all symbols from the %Eigen library. +namespace Eigen { + +//! \brief Namespace containing low-level routines from the %Eigen library. +namespace internal {} + +} From 7a915f68467df10fb321b4bfa6c139669b00daaa Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Sat, 5 Jul 2014 22:41:58 +0800 Subject: [PATCH 07/31] Move Doxygen-only stuff to *.dox --- Eigen/Core | 3 --- doc/Manual.dox | 3 +++ unsupported/Eigen/doxygen.hpp | 19 ------------------- unsupported/doc/Overview.dox | 3 +++ 4 files changed, 6 insertions(+), 22 deletions(-) delete mode 100644 unsupported/Eigen/doxygen.hpp diff --git a/Eigen/Core b/Eigen/Core index 3f1efb538..16e388fa4 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -220,9 +220,6 @@ /** \brief Namespace containing all symbols from the %Eigen library. */ namespace Eigen { -/** \brief Namespace containing low-level routines from the %Eigen library. */ -namespace internal {} - inline static const char *SimdInstructionSetsInUse(void) { #if defined(EIGEN_VECTORIZE_AVX) return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; diff --git a/doc/Manual.dox b/doc/Manual.dox index 0cbe792d5..d6d4d4689 100644 --- a/doc/Manual.dox +++ b/doc/Manual.dox @@ -159,4 +159,7 @@ namespace Eigen { \ingroup Geometry_Reference */ /** \addtogroup Splines_Module \ingroup Geometry_Reference */ + +/** \brief Namespace containing low-level routines from the %Eigen library. */ +namespace internal {} } diff --git a/unsupported/Eigen/doxygen.hpp b/unsupported/Eigen/doxygen.hpp deleted file mode 100644 index 031488527..000000000 --- a/unsupported/Eigen/doxygen.hpp +++ /dev/null @@ -1,19 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Chen-Pang He -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// As its name suggests, this file is intended to be read by Doxygen. -// There is nothing defined so we need no #include guard. - -//! \brief Namespace containing all symbols from the %Eigen library. -namespace Eigen { - -//! \brief Namespace containing low-level routines from the %Eigen library. -namespace internal {} - -} diff --git a/unsupported/doc/Overview.dox b/unsupported/doc/Overview.dox index d048377df..93b2a8927 100644 --- a/unsupported/doc/Overview.dox +++ b/unsupported/doc/Overview.dox @@ -1,3 +1,4 @@ +/// \brief Namespace containing all symbols from the %Eigen library. namespace Eigen { /** \mainpage Eigen's unsupported modules @@ -22,4 +23,6 @@ subject to be included in Eigen in the future. */ +/// \brief Namespace containing low-level routines from the %Eigen library. +namespace internal {} } From 4860da2de12cd940d506db0f20e39ef2cb7266ba Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Sat, 5 Jul 2014 23:01:27 +0800 Subject: [PATCH 08/31] Percent "Eigen" in dox to prevent linking if not referring to the Eigen namespace --- unsupported/doc/Overview.dox | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/unsupported/doc/Overview.dox b/unsupported/doc/Overview.dox index 93b2a8927..ef361b35b 100644 --- a/unsupported/doc/Overview.dox +++ b/unsupported/doc/Overview.dox @@ -1,15 +1,15 @@ /// \brief Namespace containing all symbols from the %Eigen library. namespace Eigen { -/** \mainpage Eigen's unsupported modules +/** \mainpage %Eigen's unsupported modules -This is the API documentation for Eigen's unsupported modules. +This is the API documentation for %Eigen's unsupported modules. These modules are contributions from various users. They are provided "as is", without any support. Click on the \e Modules tab at the top of this page to get a list of all unsupported modules. -Don't miss the official Eigen documentation. +Don't miss the official Eigen documentation. */ @@ -19,7 +19,7 @@ Don't miss the official Eigen documentation. The unsupported modules are contributions from various users. They are provided "as is", without any support. Nevertheless, some of them are -subject to be included in Eigen in the future. +subject to be included in %Eigen in the future. */ From 85777fc131ca8e22305ac4da41e4f18a870c2797 Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Sun, 6 Jul 2014 13:45:54 +0800 Subject: [PATCH 09/31] Mark internal namespace as \internal --- doc/Manual.dox | 2 +- unsupported/doc/Overview.dox | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/Manual.dox b/doc/Manual.dox index d6d4d4689..43af857a5 100644 --- a/doc/Manual.dox +++ b/doc/Manual.dox @@ -160,6 +160,6 @@ namespace Eigen { /** \addtogroup Splines_Module \ingroup Geometry_Reference */ -/** \brief Namespace containing low-level routines from the %Eigen library. */ +/** \internal \brief Namespace containing low-level routines from the %Eigen library. */ namespace internal {} } diff --git a/unsupported/doc/Overview.dox b/unsupported/doc/Overview.dox index ef361b35b..45464a545 100644 --- a/unsupported/doc/Overview.dox +++ b/unsupported/doc/Overview.dox @@ -23,6 +23,6 @@ subject to be included in %Eigen in the future. */ -/// \brief Namespace containing low-level routines from the %Eigen library. +/// \internal \brief Namespace containing low-level routines from the %Eigen library. namespace internal {} } From 2bf58316ee120e52a193d69104c57781f94ff6a5 Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Sun, 6 Jul 2014 13:49:43 +0800 Subject: [PATCH 10/31] Fix dox at internal::tridiagonal_qr_step --- Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h index d10eba201..fc8ecaa6f 100644 --- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h @@ -355,6 +355,7 @@ template class SelfAdjointEigenSolver bool m_eigenvectorsOk; }; +namespace internal { /** \internal * * \eigenvalues_module \ingroup Eigenvalues_Module @@ -371,7 +372,6 @@ template class SelfAdjointEigenSolver * Implemented from Golub's "Matrix Computations", algorithm 8.3.2: * "implicit symmetric QR step with Wilkinson shift" */ -namespace internal { template EIGEN_DEVICE_FUNC static void tridiagonal_qr_step(RealScalar* diag, RealScalar* subdiag, Index start, Index end, Scalar* matrixQ, Index n); From b9ee880f0786f12701b32ac5b49ff58141d207f6 Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Mon, 7 Jul 2014 21:28:00 +0800 Subject: [PATCH 11/31] chmod -x Eigen/src/Core/GenericPacketMath.h --- Eigen/src/Core/GenericPacketMath.h | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 Eigen/src/Core/GenericPacketMath.h diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h old mode 100755 new mode 100644 From e4b6979334312eefeb63eb7941b48cd95ce7d343 Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Mon, 7 Jul 2014 21:32:33 +0800 Subject: [PATCH 12/31] Find OpenBLAS more aggressively. This made a difference on Fedora 20 --- bench/btl/cmake/FindOPENBLAS.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench/btl/cmake/FindOPENBLAS.cmake b/bench/btl/cmake/FindOPENBLAS.cmake index c76fc251c..2a0919436 100644 --- a/bench/btl/cmake/FindOPENBLAS.cmake +++ b/bench/btl/cmake/FindOPENBLAS.cmake @@ -3,7 +3,7 @@ if (OPENBLAS_LIBRARIES) set(OPENBLAS_FIND_QUIETLY TRUE) endif (OPENBLAS_LIBRARIES) -find_file(OPENBLAS_LIBRARIES libopenblas.so PATHS /usr/lib $ENV{OPENBLASDIR} ${LIB_INSTALL_DIR}) +find_file(OPENBLAS_LIBRARIES NAMES libopenblas.so libopenblas.so.0 PATHS /usr/lib /usr/lib64 $ENV{OPENBLASDIR} ${LIB_INSTALL_DIR}) find_library(OPENBLAS_LIBRARIES openblas PATHS $ENV{OPENBLASDIR} ${LIB_INSTALL_DIR}) if(OPENBLAS_LIBRARIES AND CMAKE_COMPILER_IS_GNUCXX) From 1eefa5a84192da236df601f067eb90e5bd5a9379 Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Mon, 7 Jul 2014 22:55:28 +0800 Subject: [PATCH 13/31] Find benchmark opponents also in /usr/lib64 --- bench/btl/cmake/FindACML.cmake | 2 ++ bench/btl/cmake/FindATLAS.cmake | 13 ++++--------- bench/btl/cmake/FindCBLAS.cmake | 1 + 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/bench/btl/cmake/FindACML.cmake b/bench/btl/cmake/FindACML.cmake index f45ae1b0d..4989fa2f4 100644 --- a/bench/btl/cmake/FindACML.cmake +++ b/bench/btl/cmake/FindACML.cmake @@ -17,6 +17,7 @@ find_file(ACML_LIBRARIES libacml_mp.so PATHS /usr/lib + /usr/lib64 $ENV{ACMLDIR}/lib ${LIB_INSTALL_DIR} ) @@ -35,6 +36,7 @@ if(NOT ACML_LIBRARIES) libacml.so libacml_mv.so PATHS /usr/lib + /usr/lib64 $ENV{ACMLDIR}/lib ${LIB_INSTALL_DIR} ) diff --git a/bench/btl/cmake/FindATLAS.cmake b/bench/btl/cmake/FindATLAS.cmake index 14b1dee09..4136a989d 100644 --- a/bench/btl/cmake/FindATLAS.cmake +++ b/bench/btl/cmake/FindATLAS.cmake @@ -3,18 +3,13 @@ if (ATLAS_LIBRARIES) set(ATLAS_FIND_QUIETLY TRUE) endif (ATLAS_LIBRARIES) -find_file(ATLAS_LIB libatlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_file(ATLAS_LIB libatlas.so.3 PATHS /usr/lib /usr/lib/atlas /usr/lib64 /usr/lib64/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) find_library(ATLAS_LIB satlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -find_file(ATLAS_LAPACK liblapack_atlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -find_library(ATLAS_LAPACK lapack_atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_file(ATLAS_LAPACK NAMES liblapack_atlas.so.3 liblapack.so.3 PATHS /usr/lib /usr/lib/atlas /usr/lib64 /usr/lib64/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_library(ATLAS_LAPACK NAMES lapack_atlas lapack PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -if(NOT ATLAS_LAPACK) - find_file(ATLAS_LAPACK liblapack.so.3 PATHS /usr/lib/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) - find_library(ATLAS_LAPACK lapack PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -endif(NOT ATLAS_LAPACK) - -find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib /usr/lib/atlas /usr/lib64 /usr/lib64/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) find_library(ATLAS_F77BLAS f77blas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) if(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS) diff --git a/bench/btl/cmake/FindCBLAS.cmake b/bench/btl/cmake/FindCBLAS.cmake index 554f0291b..ce0f2f2b2 100644 --- a/bench/btl/cmake/FindCBLAS.cmake +++ b/bench/btl/cmake/FindCBLAS.cmake @@ -23,6 +23,7 @@ find_file(CBLAS_LIBRARIES libcblas.so.3 PATHS /usr/lib + /usr/lib64 $ENV{CBLASDIR}/lib ${LIB_INSTALL_DIR} ) From 0dfb73d46ada6a9749a24a946186c8a8c2472dc5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 8 Jul 2014 10:04:27 +0200 Subject: [PATCH 15/31] Fix LDLT with semi-definite complex matrices: owing to round-off errors, the diagonal was not real. Also exploit the fact that the diagonal is real in the rest of LDLT --- Eigen/src/Cholesky/LDLT.h | 12 ++++++------ test/cholesky.cpp | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h index ef81030f8..6881e1ca8 100644 --- a/Eigen/src/Cholesky/LDLT.h +++ b/Eigen/src/Cholesky/LDLT.h @@ -295,7 +295,7 @@ template<> struct ldlt_inplace if(k>0) { - temp.head(k) = mat.diagonal().head(k).asDiagonal() * A10.adjoint(); + temp.head(k) = mat.diagonal().real().head(k).asDiagonal() * A10.adjoint(); mat.coeffRef(k,k) -= (A10 * temp.head(k)).value(); if(rs>0) A21.noalias() -= A20 * temp.head(k); @@ -305,10 +305,10 @@ template<> struct ldlt_inplace // was smaller than the cutoff value. However, soince LDLT is not rank-revealing // we should only make sure we do not introduce INF or NaN values. // LAPACK also uses 0 as the cutoff value. - if((rs>0) && (abs(mat.coeffRef(k,k)) > RealScalar(0))) - A21 /= mat.coeffRef(k,k); - RealScalar realAkk = numext::real(mat.coeffRef(k,k)); + if((rs>0) && (abs(realAkk) > RealScalar(0))) + A21 /= realAkk; + if (sign == PositiveSemiDef) { if (realAkk < 0) sign = Indefinite; } else if (sign == NegativeSemiDef) { @@ -487,7 +487,7 @@ struct solve_retval, Rhs> EIGEN_USING_STD_MATH(max); typedef typename LDLTType::MatrixType MatrixType; typedef typename LDLTType::RealScalar RealScalar; - const Diagonal vectorD = dec().vectorD(); + const typename Diagonal::RealReturnType vectorD(dec().vectorD()); // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon // as motivated by LAPACK's xGELSS: // RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() *NumTraits::epsilon(),RealScalar(1) / NumTraits::highest()); @@ -552,7 +552,7 @@ MatrixType LDLT::reconstructedMatrix() const // L^* P res = matrixU() * res; // D(L^*P) - res = vectorD().asDiagonal() * res; + res = vectorD().real().asDiagonal() * res; // L(DL^*P) res = matrixL() * res; // P^T (LDL^*P) diff --git a/test/cholesky.cpp b/test/cholesky.cpp index 9aa7da7bb..1d147bd7a 100644 --- a/test/cholesky.cpp +++ b/test/cholesky.cpp @@ -209,7 +209,7 @@ template void cholesky(const MatrixType& m) VERIFY_IS_APPROX(A * vecX, vecB); } - // check matrices with wide spectrum + // check matrices with a wide spectrum if(rows>=3) { RealScalar s = (std::min)(16,std::numeric_limits::max_exponent10/8); From 904509fbb664b3165531e6e4d7234d0765425ad9 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 8 Jul 2014 10:28:09 +0200 Subject: [PATCH 16/31] Move using std::abs from Eigen's namespace to function scope. --- unsupported/Eigen/src/IterativeSolvers/Scaling.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/src/IterativeSolvers/Scaling.h b/unsupported/Eigen/src/IterativeSolvers/Scaling.h index 9189fddf1..d113e6e90 100644 --- a/unsupported/Eigen/src/IterativeSolvers/Scaling.h +++ b/unsupported/Eigen/src/IterativeSolvers/Scaling.h @@ -11,7 +11,6 @@ #define EIGEN_ITERSCALING_H namespace Eigen { -using std::abs; /** * \ingroup IterativeSolvers_Module @@ -73,6 +72,7 @@ class IterScaling */ void compute (const MatrixType& mat) { + using std::abs; int m = mat.rows(); int n = mat.cols(); eigen_assert((m>0 && m == n) && "Please give a non - empty matrix"); From e25e6748521818fb13270f7351df8538eb54a056 Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Tue, 8 Jul 2014 13:57:26 +0200 Subject: [PATCH 17/31] bug #837: Always re-align the result of EIGEN_ALLOCA. --- Eigen/src/Core/util/Memory.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 9718ef6a8..dd9285714 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -607,12 +607,9 @@ template class aligned_stack_memory_handler * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token. */ #ifdef EIGEN_ALLOCA - // The native alloca() that comes with llvm aligns buffer on 16 bytes even when AVX is enabled. -#if defined(__arm__) || defined(_WIN32) || EIGEN_ALIGN_BYTES > 16 - #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast((reinterpret_cast(EIGEN_ALLOCA(SIZE+EIGEN_ALIGN_BYTES)) & ~(size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES) - #else - #define EIGEN_ALIGNED_ALLOCA EIGEN_ALLOCA - #endif + // We always manually re-align the result of EIGEN_ALLOCA. + // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment. + #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast((reinterpret_cast(EIGEN_ALLOCA(SIZE+EIGEN_ALIGN_BYTES-1)) + EIGEN_ALIGN_BYTES-1) & ~(size_t(EIGEN_ALIGN_BYTES-1))) #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \ Eigen::internal::check_size_for_overflow(SIZE); \ From b47ef1431f2e4d1218253df179b593cc5c269aa7 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 8 Jul 2014 16:47:11 +0200 Subject: [PATCH 18/31] Fix many long to int implicit conversions --- Eigen/src/Core/GeneralProduct.h | 4 +- Eigen/src/Core/GenericPacketMath.h | 4 +- Eigen/src/Core/Visitor.h | 2 +- Eigen/src/Core/arch/AVX/Complex.h | 4 +- Eigen/src/Core/arch/AVX/PacketMath.h | 8 ++-- Eigen/src/Core/arch/AltiVec/Complex.h | 4 +- Eigen/src/Core/arch/AltiVec/PacketMath.h | 8 ++-- Eigen/src/Core/arch/NEON/Complex.h | 4 +- Eigen/src/Core/arch/NEON/PacketMath.h | 8 ++-- Eigen/src/Core/arch/SSE/Complex.h | 4 +- Eigen/src/Core/arch/SSE/PacketMath.h | 12 +++--- .../Core/products/SelfadjointMatrixVector.h | 4 +- .../Core/products/TriangularMatrixVector.h | 2 +- Eigen/src/SparseCore/SparseBlock.h | 40 ++++++++++--------- Eigen/src/SparseCore/TriangularSolver.h | 30 ++++++++------ test/cholesky.cpp | 6 +-- test/mapstaticmethods.cpp | 6 ++- test/product_notemporary.cpp | 5 +-- test/ref.cpp | 5 +-- test/sparse.h | 4 +- test/sparse_basic.cpp | 8 ++-- 21 files changed, 89 insertions(+), 83 deletions(-) diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index 229d12c3f..624b8b6e8 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -445,7 +445,7 @@ template<> struct gemv_selector if(!evalToDest) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - int size = dest.size(); + Index size = dest.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif if(!alphaIsCompatible) @@ -510,7 +510,7 @@ template<> struct gemv_selector if(!DirectlyUseRhs) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - int size = actualRhs.size(); + Index size = actualRhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif Map(actualRhsPtr, actualRhs.size()) = actualRhs; diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 4523d2263..a1fcb82fb 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -233,10 +233,10 @@ template EIGEN_DEVICE_FUNC inline void pstore( template EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from) { (*to) = from; } - template EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, int /*stride*/) + template EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, DenseIndex /*stride*/) { return ploadu(from); } - template EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, int /*stride*/) + template EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, DenseIndex /*stride*/) { pstore(to, from); } /** \internal tries to do cache prefetching of \a addr */ diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h index 64867b7a2..6f4b9ec35 100644 --- a/Eigen/src/Core/Visitor.h +++ b/Eigen/src/Core/Visitor.h @@ -194,7 +194,7 @@ DenseBase::minCoeff(IndexType* index) const EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) internal::min_coeff_visitor minVisitor; this->visit(minVisitor); - *index = (RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row; + *index = IndexType((RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row); return minVisitor.res; } diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h index 9ced85132..b3cf7bb26 100644 --- a/Eigen/src/Core/arch/AVX/Complex.h +++ b/Eigen/src/Core/arch/AVX/Complex.h @@ -92,7 +92,7 @@ template<> EIGEN_STRONG_INLINE Packet4cf ploaddup(const std::complex< template<> EIGEN_STRONG_INLINE void pstore >(std::complex* to, const Packet4cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); } template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex* to, const Packet4cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); } -template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather, Packet4cf>(const std::complex* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather, Packet4cf>(const std::complex* from, DenseIndex stride) { return Packet4cf(_mm256_set_ps(std::imag(from[3*stride]), std::real(from[3*stride]), std::imag(from[2*stride]), std::real(from[2*stride]), @@ -100,7 +100,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather, Packe std::imag(from[0*stride]), std::real(from[0*stride]))); } -template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet4cf>(std::complex* to, const Packet4cf& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet4cf>(std::complex* to, const Packet4cf& from, DenseIndex stride) { __m128 low = _mm256_extractf128_ps(from.v, 0); to[stride*0] = std::complex(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)), diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 8b8307d75..66b97bd69 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -224,17 +224,17 @@ template<> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet8i& // NOTE: leverage _mm256_i32gather_ps and _mm256_i32gather_pd if AVX2 instructions are available // NOTE: for the record the following seems to be slower: return _mm256_i32gather_ps(from, _mm256_set1_epi32(stride), 4); -template<> EIGEN_DEVICE_FUNC inline Packet8f pgather(const float* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet8f pgather(const float* from, DenseIndex stride) { return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride], from[3*stride], from[2*stride], from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline Packet4d pgather(const double* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4d pgather(const double* from, DenseIndex stride) { return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline void pscatter(float* to, const Packet8f& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter(float* to, const Packet8f& from, DenseIndex stride) { __m128 low = _mm256_extractf128_ps(from, 0); to[stride*0] = _mm_cvtss_f32(low); @@ -248,7 +248,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter(float* to, co to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)); to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)); } -template<> EIGEN_DEVICE_FUNC inline void pscatter(double* to, const Packet4d& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter(double* to, const Packet4d& from, DenseIndex stride) { __m128d low = _mm256_extractf128_pd(from, 0); to[stride*0] = _mm_cvtsd_f64(low); diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index 5409ddedd..73fa62643 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -68,14 +68,14 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1(const std::complex EIGEN_DEVICE_FUNC inline Packet2cf pgather, Packet2cf>(const std::complex* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather, Packet2cf>(const std::complex* from, DenseIndex stride) { std::complex EIGEN_ALIGN16 af[2]; af[0] = from[0*stride]; af[1] = from[1*stride]; return Packet2cf(vec_ld(0, (const float*)af)); } -template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet2cf>(std::complex* to, const Packet2cf& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet2cf>(std::complex* to, const Packet2cf& from, DenseIndex stride) { std::complex EIGEN_ALIGN16 af[2]; vec_st(from.v, 0, (float*)af); diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 0e9adf450..aadfc17be 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -190,7 +190,7 @@ pbroadcast4(const int *a, a3 = vec_splat(a3, 3); } -template<> EIGEN_DEVICE_FUNC inline Packet4f pgather(const float* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4f pgather(const float* from, DenseIndex stride) { float EIGEN_ALIGN16 af[4]; af[0] = from[0*stride]; @@ -199,7 +199,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather(const floa af[3] = from[3*stride]; return vec_ld(0, af); } -template<> EIGEN_DEVICE_FUNC inline Packet4i pgather(const int* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4i pgather(const int* from, DenseIndex stride) { int EIGEN_ALIGN16 ai[4]; ai[0] = from[0*stride]; @@ -208,7 +208,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4i pgather(const int* f ai[3] = from[3*stride]; return vec_ld(0, ai); } -template<> EIGEN_DEVICE_FUNC inline void pscatter(float* to, const Packet4f& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter(float* to, const Packet4f& from, DenseIndex stride) { float EIGEN_ALIGN16 af[4]; vec_st(from, 0, af); @@ -217,7 +217,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter(float* to, co to[2*stride] = af[2]; to[3*stride] = af[3]; } -template<> EIGEN_DEVICE_FUNC inline void pscatter(int* to, const Packet4i& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter(int* to, const Packet4i& from, DenseIndex stride) { int EIGEN_ALIGN16 ai[4]; vec_st(from, 0, ai); diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index 259f2e7b8..42e7733d7 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -111,7 +111,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf ploaddup(const std::complex< template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); } -template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather, Packet2cf>(const std::complex* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather, Packet2cf>(const std::complex* from, DenseIndex stride) { Packet4f res; res = vsetq_lane_f32(std::real(from[0*stride]), res, 0); @@ -121,7 +121,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather, Packe return Packet2cf(res); } -template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet2cf>(std::complex* to, const Packet2cf& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet2cf>(std::complex* to, const Packet2cf& from, DenseIndex stride) { to[stride*0] = std::complex(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1)); to[stride*1] = std::complex(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3)); diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index e5eb06f36..380b76ae9 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -222,7 +222,7 @@ template<> EIGEN_STRONG_INLINE void pstore(int* to, const Packet4i& f template<> EIGEN_STRONG_INLINE void pstoreu(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); } template<> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); } -template<> EIGEN_DEVICE_FUNC inline Packet4f pgather(const float* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4f pgather(const float* from, DenseIndex stride) { Packet4f res; res = vsetq_lane_f32(from[0*stride], res, 0); @@ -231,7 +231,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather(const floa res = vsetq_lane_f32(from[3*stride], res, 3); return res; } -template<> EIGEN_DEVICE_FUNC inline Packet4i pgather(const int* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4i pgather(const int* from, DenseIndex stride) { Packet4i res; res = vsetq_lane_s32(from[0*stride], res, 0); @@ -241,14 +241,14 @@ template<> EIGEN_DEVICE_FUNC inline Packet4i pgather(const int* f return res; } -template<> EIGEN_DEVICE_FUNC inline void pscatter(float* to, const Packet4f& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter(float* to, const Packet4f& from, DenseIndex stride) { to[stride*0] = vgetq_lane_f32(from, 0); to[stride*1] = vgetq_lane_f32(from, 1); to[stride*2] = vgetq_lane_f32(from, 2); to[stride*3] = vgetq_lane_f32(from, 3); } -template<> EIGEN_DEVICE_FUNC inline void pscatter(int* to, const Packet4i& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter(int* to, const Packet4i& from, DenseIndex stride) { to[stride*0] = vgetq_lane_s32(from, 0); to[stride*1] = vgetq_lane_s32(from, 1); diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index 758183c18..414c4cb6a 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -114,13 +114,13 @@ template<> EIGEN_STRONG_INLINE void pstore >(std::complex EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); } -template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather, Packet2cf>(const std::complex* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather, Packet2cf>(const std::complex* from, DenseIndex stride) { return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]), std::imag(from[0*stride]), std::real(from[0*stride]))); } -template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet2cf>(std::complex* to, const Packet2cf& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet2cf>(std::complex* to, const Packet2cf& from, DenseIndex stride) { to[stride*0] = std::complex(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)), _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1))); diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 6912f3bc3..380afe77c 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -383,32 +383,32 @@ template<> EIGEN_STRONG_INLINE void pstoreu(double* to, const Packet2d& template<> EIGEN_STRONG_INLINE void pstoreu(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast(to), Packet2d(_mm_castps_pd(from))); } template<> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast(to), Packet2d(_mm_castsi128_pd(from))); } -template<> EIGEN_DEVICE_FUNC inline Packet4f pgather(const float* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4f pgather(const float* from, DenseIndex stride) { return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline Packet2d pgather(const double* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet2d pgather(const double* from, DenseIndex stride) { return _mm_set_pd(from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline Packet4i pgather(const int* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4i pgather(const int* from, DenseIndex stride) { return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline void pscatter(float* to, const Packet4f& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter(float* to, const Packet4f& from, DenseIndex stride) { to[stride*0] = _mm_cvtss_f32(from); to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1)); to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2)); to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3)); } -template<> EIGEN_DEVICE_FUNC inline void pscatter(double* to, const Packet2d& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter(double* to, const Packet2d& from, DenseIndex stride) { to[stride*0] = _mm_cvtsd_f64(from); to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1)); } -template<> EIGEN_DEVICE_FUNC inline void pscatter(int* to, const Packet4i& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter(int* to, const Packet4i& from, DenseIndex stride) { to[stride*0] = _mm_cvtsi128_si32(from); to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1)); diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h index fdc81205a..26e787949 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -218,7 +218,7 @@ struct SelfadjointProductMatrix if(!EvalToDest) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - int size = dest.size(); + Index size = dest.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif MappedDest(actualDestPtr, dest.size()) = dest; @@ -227,7 +227,7 @@ struct SelfadjointProductMatrix if(!UseRhs) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - int size = rhs.size(); + Index size = rhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif Map(actualRhsPtr, rhs.size()) = rhs; diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index 6117d5a82..817768481 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h @@ -322,7 +322,7 @@ template<> struct trmv_selector if(!DirectlyUseRhs) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - int size = actualRhs.size(); + Index size = actualRhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif Map(actualRhsPtr, actualRhs.size()) = actualRhs; diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h index 5b95cc33f..cbcd7a299 100644 --- a/Eigen/src/SparseCore/SparseBlock.h +++ b/Eigen/src/SparseCore/SparseBlock.h @@ -50,11 +50,11 @@ public: Index m_outer; }; - inline BlockImpl(const XprType& xpr, int i) + inline BlockImpl(const XprType& xpr, Index i) : m_matrix(xpr), m_outerStart(i), m_outerSize(OuterSize) {} - inline BlockImpl(const XprType& xpr, int startRow, int startCol, int blockRows, int blockCols) + inline BlockImpl(const XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) : m_matrix(xpr), m_outerStart(IsRowMajor ? startRow : startCol), m_outerSize(IsRowMajor ? blockRows : blockCols) {} @@ -65,7 +65,7 @@ public: { Index nnz = 0; Index end = m_outerStart + m_outerSize.value(); - for(int j=m_outerStart; j,BlockRows,BlockCols,true : public internal::sparse_matrix_block_impl,BlockRows,BlockCols> { public: + typedef _Index Index; typedef SparseMatrix<_Scalar, _Options, _Index> SparseMatrixType; typedef internal::sparse_matrix_block_impl Base; - inline BlockImpl(SparseMatrixType& xpr, int i) + inline BlockImpl(SparseMatrixType& xpr, Index i) : Base(xpr, i) {} - inline BlockImpl(SparseMatrixType& xpr, int startRow, int startCol, int blockRows, int blockCols) + inline BlockImpl(SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) : Base(xpr, startRow, startCol, blockRows, blockCols) {} @@ -282,13 +283,14 @@ class BlockImpl,BlockRows,BlockCol : public internal::sparse_matrix_block_impl,BlockRows,BlockCols> { public: + typedef _Index Index; typedef const SparseMatrix<_Scalar, _Options, _Index> SparseMatrixType; typedef internal::sparse_matrix_block_impl Base; - inline BlockImpl(SparseMatrixType& xpr, int i) + inline BlockImpl(SparseMatrixType& xpr, Index i) : Base(xpr, i) {} - inline BlockImpl(SparseMatrixType& xpr, int startRow, int startCol, int blockRows, int blockCols) + inline BlockImpl(SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) : Base(xpr, startRow, startCol, blockRows, blockCols) {} @@ -362,7 +364,7 @@ public: /** Column or Row constructor */ - inline BlockImpl(const XprType& xpr, int i) + inline BlockImpl(const XprType& xpr, Index i) : m_matrix(xpr), m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0), m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0), @@ -372,32 +374,32 @@ public: /** Dynamic-size constructor */ - inline BlockImpl(const XprType& xpr, int startRow, int startCol, int blockRows, int blockCols) + inline BlockImpl(const XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) : m_matrix(xpr), m_startRow(startRow), m_startCol(startCol), m_blockRows(blockRows), m_blockCols(blockCols) {} - inline int rows() const { return m_blockRows.value(); } - inline int cols() const { return m_blockCols.value(); } + inline Index rows() const { return m_blockRows.value(); } + inline Index cols() const { return m_blockCols.value(); } - inline Scalar& coeffRef(int row, int col) + inline Scalar& coeffRef(Index row, Index col) { return m_matrix.const_cast_derived() .coeffRef(row + m_startRow.value(), col + m_startCol.value()); } - inline const Scalar coeff(int row, int col) const + inline const Scalar coeff(Index row, Index col) const { return m_matrix.coeff(row + m_startRow.value(), col + m_startCol.value()); } - inline Scalar& coeffRef(int index) + inline Scalar& coeffRef(Index index) { return m_matrix.const_cast_derived() .coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); } - inline const Scalar coeff(int index) const + inline const Scalar coeff(Index index) const { return m_matrix .coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), diff --git a/Eigen/src/SparseCore/TriangularSolver.h b/Eigen/src/SparseCore/TriangularSolver.h index f958ab300..dd55522a7 100644 --- a/Eigen/src/SparseCore/TriangularSolver.h +++ b/Eigen/src/SparseCore/TriangularSolver.h @@ -28,15 +28,16 @@ template struct sparse_solve_triangular_selector { typedef typename Rhs::Scalar Scalar; + typedef typename Lhs::Index Index; static void run(const Lhs& lhs, Rhs& other) { - for(int col=0 ; col struct sparse_solve_triangular_selector { typedef typename Rhs::Scalar Scalar; + typedef typename Lhs::Index Index; static void run(const Lhs& lhs, Rhs& other) { - for(int col=0 ; col=0 ; --i) + for(Index i=lhs.rows()-1 ; i>=0 ; --i) { Scalar tmp = other.coeff(i,col); Scalar l_ii = 0; @@ -100,11 +102,12 @@ template struct sparse_solve_triangular_selector { typedef typename Rhs::Scalar Scalar; + typedef typename Lhs::Index Index; static void run(const Lhs& lhs, Rhs& other) { - for(int col=0 ; col struct sparse_solve_triangular_selector { typedef typename Rhs::Scalar Scalar; + typedef typename Lhs::Index Index; static void run(const Lhs& lhs, Rhs& other) { - for(int col=0 ; col=0; --i) + for(Index i=lhs.cols()-1; i>=0; --i) { Scalar& tmp = other.coeffRef(i,col); if (tmp!=Scalar(0)) // optimization when other is actually sparse @@ -209,7 +213,7 @@ struct sparse_solve_triangular_sparse_selector { typedef typename Rhs::Scalar Scalar; typedef typename promote_index_type::Index, - typename traits::Index>::type Index; + typename traits::Index>::type Index; static void run(const Lhs& lhs, Rhs& other) { const bool IsLower = (UpLo==Lower); @@ -219,7 +223,7 @@ struct sparse_solve_triangular_sparse_selector Rhs res(other.rows(), other.cols()); res.reserve(other.nonZeros()); - for(int col=0 ; col tempVector.coeffRef(rhsIt.index()) = rhsIt.value(); } - for(int i=IsLower?0:lhs.cols()-1; + for(Index i=IsLower?0:lhs.cols()-1; IsLower?i=0; i+=IsLower?1:-1) { @@ -267,7 +271,7 @@ struct sparse_solve_triangular_sparse_selector } - int count = 0; + Index count = 0; // FIXME compute a reference value to filter zeros for (typename AmbiVector::Iterator it(tempVector/*,1e-12*/); it; ++it) { diff --git a/test/cholesky.cpp b/test/cholesky.cpp index 1d147bd7a..a883192ab 100644 --- a/test/cholesky.cpp +++ b/test/cholesky.cpp @@ -181,7 +181,7 @@ template void cholesky(const MatrixType& m) if(rows>=3) { SquareMatrixType A = symm; - int c = internal::random(0,rows-2); + Index c = internal::random(0,rows-2); A.bottomRightCorner(c,c).setZero(); // Make sure a solution exists: vecX.setRandom(); @@ -196,7 +196,7 @@ template void cholesky(const MatrixType& m) // check non-full rank matrices if(rows>=3) { - int r = internal::random(1,rows-1); + Index r = internal::random(1,rows-1); Matrix a = Matrix::Random(rows,r); SquareMatrixType A = a * a.adjoint(); // Make sure a solution exists: @@ -215,7 +215,7 @@ template void cholesky(const MatrixType& m) RealScalar s = (std::min)(16,std::numeric_limits::max_exponent10/8); Matrix a = Matrix::Random(rows,rows); Matrix d = Matrix::Random(rows); - for(int k=0; k(-s,s)); SquareMatrixType A = a * d.asDiagonal() * a.adjoint(); // Make sure a solution exists: diff --git a/test/mapstaticmethods.cpp b/test/mapstaticmethods.cpp index 5b512bde4..06272d106 100644 --- a/test/mapstaticmethods.cpp +++ b/test/mapstaticmethods.cpp @@ -69,7 +69,8 @@ struct mapstaticmethods_impl { static void run(const PlainObjectType& m) { - int rows = m.rows(), cols = m.cols(); + typedef typename PlainObjectType::Index Index; + Index rows = m.rows(), cols = m.cols(); int i = internal::random(2,5), j = internal::random(2,5); @@ -115,7 +116,8 @@ struct mapstaticmethods_impl { static void run(const PlainObjectType& v) { - int size = v.size(); + typedef typename PlainObjectType::Index Index; + Index size = v.size(); int i = internal::random(2,5); diff --git a/test/product_notemporary.cpp b/test/product_notemporary.cpp index 258d238e2..3a9df618b 100644 --- a/test/product_notemporary.cpp +++ b/test/product_notemporary.cpp @@ -7,13 +7,12 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -static int nb_temporaries; +static long int nb_temporaries; -inline void on_temporary_creation(int size) { +inline void on_temporary_creation(long int size) { // here's a great place to set a breakpoint when debugging failures in this test! if(size!=0) nb_temporaries++; } - #define EIGEN_DENSE_STORAGE_CTOR_PLUGIN { on_temporary_creation(size); } diff --git a/test/ref.cpp b/test/ref.cpp index 19e81549c..d91e3b54c 100644 --- a/test/ref.cpp +++ b/test/ref.cpp @@ -12,13 +12,12 @@ #undef EIGEN_DEFAULT_TO_ROW_MAJOR #endif -static int nb_temporaries; +static long int nb_temporaries; -inline void on_temporary_creation(int) { +inline void on_temporary_creation(long int) { // here's a great place to set a breakpoint when debugging failures in this test! nb_temporaries++; } - #define EIGEN_DENSE_STORAGE_CTOR_PLUGIN { on_temporary_creation(size); } diff --git a/test/sparse.h b/test/sparse.h index e19a76316..81ab9e873 100644 --- a/test/sparse.h +++ b/test/sparse.h @@ -71,7 +71,7 @@ initSparse(double density, //sparseMat.startVec(j); for(Index i=0; i(0,1) < density) ? internal::random() : Scalar(0); @@ -163,7 +163,7 @@ initSparse(double density, { sparseVec.reserve(int(refVec.size()*density)); sparseVec.setZero(); - for(Index i=0; i(0,1) < density) ? internal::random() : Scalar(0); if (v!=Scalar(0)) diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp index 6c620f037..4c9b9111e 100644 --- a/test/sparse_basic.cpp +++ b/test/sparse_basic.cpp @@ -147,7 +147,7 @@ template void sparse_basic(const SparseMatrixType& re DenseMatrix m1(rows,cols); m1.setZero(); SparseMatrixType m2(rows,cols); - VectorXi r(VectorXi::Constant(m2.outerSize(), ((mode%2)==0) ? m2.innerSize() : std::max(1,m2.innerSize()/8))); + VectorXi r(VectorXi::Constant(m2.outerSize(), ((mode%2)==0) ? int(m2.innerSize()) : std::max(1,int(m2.innerSize())/8))); m2.reserve(r); for (int k=0; k void sparse_basic(const SparseMatrixType& re VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.col(j0)+refMat2.col(j1)); SparseMatrixType m3(rows,rows); - m3.reserve(VectorXi::Constant(rows,rows/2)); + m3.reserve(VectorXi::Constant(rows,int(rows/2))); for(Index j=0; j void sparse_basic(const SparseMatrixType& re { typedef Triplet TripletType; std::vector triplets; - int ntriplets = rows*cols; + Index ntriplets = rows*cols; triplets.reserve(ntriplets); DenseMatrix refMat(rows,cols); refMat.setZero(); - for(int i=0;i(0,rows-1); Index c = internal::random(0,cols-1); From 5c4733f6e41dc09a1d700c780c8a3492906e127a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 8 Jul 2014 18:38:34 +0200 Subject: [PATCH 19/31] Fix bug #809: unused variable warning --- Eigen/src/OrderingMethods/Ordering.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h index 4e0609784..f3c31f9cb 100644 --- a/Eigen/src/OrderingMethods/Ordering.h +++ b/Eigen/src/OrderingMethods/Ordering.h @@ -136,12 +136,12 @@ class COLAMDOrdering Index stats [COLAMD_STATS]; internal::colamd_set_defaults(knobs); - Index info; IndexVector p(n+1), A(Alen); for(Index i=0; i <= n; i++) p(i) = mat.outerIndexPtr()[i]; for(Index i=0; i < nnz; i++) A(i) = mat.innerIndexPtr()[i]; // Call Colamd routine to compute the ordering - info = internal::colamd(m, n, Alen, A.data(), p.data(), knobs, stats); + Index info = internal::colamd(m, n, Alen, A.data(), p.data(), knobs, stats); + EIGEN_UNUSED_VARIABLE(info); eigen_assert( info && "COLAMD failed " ); perm.resize(n); From 5214466b7a38ec7684ea685d5d9c56ae2cae678e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 8 Jul 2014 19:01:49 +0200 Subject: [PATCH 20/31] Fix implicit long to int conversions in blas interface --- blas/level1_impl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/blas/level1_impl.h b/blas/level1_impl.h index 98e4c0963..e623bd178 100644 --- a/blas/level1_impl.h +++ b/blas/level1_impl.h @@ -59,7 +59,7 @@ int EIGEN_CAT(EIGEN_CAT(i,SCALAR_SUFFIX),amax_)(int *n, RealScalar *px, int *inc DenseIndex ret; if(*incx==1) make_vector(x,*n).cwiseAbs().maxCoeff(&ret); else make_vector(x,*n,std::abs(*incx)).cwiseAbs().maxCoeff(&ret); - return ret+1; + return int(ret)+1; } int EIGEN_CAT(EIGEN_CAT(i,SCALAR_SUFFIX),amin_)(int *n, RealScalar *px, int *incx) @@ -70,7 +70,7 @@ int EIGEN_CAT(EIGEN_CAT(i,SCALAR_SUFFIX),amin_)(int *n, RealScalar *px, int *inc DenseIndex ret; if(*incx==1) make_vector(x,*n).cwiseAbs().minCoeff(&ret); else make_vector(x,*n,std::abs(*incx)).cwiseAbs().minCoeff(&ret); - return ret+1; + return int(ret)+1; } int EIGEN_BLAS_FUNC(rotg)(RealScalar *pa, RealScalar *pb, RealScalar *pc, RealScalar *ps) From e3557e8dd292e71e033677926cc3ab749d2696b3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 8 Jul 2014 18:58:41 +0200 Subject: [PATCH 21/31] bug #808: use double instead of float for the increasing size ratio in CompressedStorage::resize (grafted from 0e0ae400843cd9a459e5306d4d6560dbea759a42 ) --- Eigen/src/SparseCore/CompressedStorage.h | 4 ++-- Eigen/src/SparseCore/SparseMatrix.h | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Eigen/src/SparseCore/CompressedStorage.h b/Eigen/src/SparseCore/CompressedStorage.h index 10d516a66..a667cb56e 100644 --- a/Eigen/src/SparseCore/CompressedStorage.h +++ b/Eigen/src/SparseCore/CompressedStorage.h @@ -83,10 +83,10 @@ class CompressedStorage reallocate(m_size); } - void resize(size_t size, float reserveSizeFactor = 0) + void resize(size_t size, double reserveSizeFactor = 0) { if (m_allocatedSize::Scalar& Sparse size_t p = m_outerIndex[outer+1]; ++m_outerIndex[outer+1]; - float reallocRatio = 1; + double reallocRatio = 1; if (m_data.allocatedSize()<=m_data.size()) { // if there is no preallocated memory, let's reserve a minimum of 32 elements @@ -1190,13 +1190,13 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& Sparse { // we need to reallocate the data, to reduce multiple reallocations // we use a smart resize algorithm based on the current filling ratio - // in addition, we use float to avoid integers overflows - float nnzEstimate = float(m_outerIndex[outer])*float(m_outerSize)/float(outer+1); - reallocRatio = (nnzEstimate-float(m_data.size()))/float(m_data.size()); + // in addition, we use double to avoid integers overflows + double nnzEstimate = double(m_outerIndex[outer])*double(m_outerSize)/double(outer+1); + reallocRatio = (nnzEstimate-double(m_data.size()))/double(m_data.size()); // furthermore we bound the realloc ratio to: // 1) reduce multiple minor realloc when the matrix is almost filled // 2) avoid to allocate too much memory when the matrix is almost empty - reallocRatio = (std::min)((std::max)(reallocRatio,1.5f),8.f); + reallocRatio = (std::min)((std::max)(reallocRatio,1.5),8.); } } m_data.resize(m_data.size()+1,reallocRatio); From 77d57cd6813d4326543c68f7fd64901402e05d17 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 8 Jul 2014 19:07:58 +0200 Subject: [PATCH 22/31] bug #808: fix implicit conversions from int/longint to float/double --- Eigen/src/Core/functors/NullaryFunctors.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h index 950acd93b..be03fbf52 100644 --- a/Eigen/src/Core/functors/NullaryFunctors.h +++ b/Eigen/src/Core/functors/NullaryFunctors.h @@ -92,7 +92,7 @@ struct linspaced_op_impl template EIGEN_STRONG_INLINE const Packet packetOp(Index i) const - { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1(i),m_interPacket))); } + { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1(Scalar(i)),m_interPacket))); } const Scalar m_low; const Scalar m_step; @@ -112,7 +112,7 @@ template struct functor_traits< linspaced_o template struct linspaced_op { typedef typename packet_traits::type Packet; - linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/(num_steps-1))) {} + linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1))) {} template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); } From 1967e7f2f37b0b2c17cb4ead26f43915d342c663 Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Wed, 9 Jul 2014 03:32:32 +0800 Subject: [PATCH 23/31] Fix bug #839 --- Eigen/src/Core/TriangularMatrix.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index cdd341965..72792d21b 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -323,22 +323,22 @@ template class TriangularView /** Efficient triangular matrix times vector/matrix product */ template EIGEN_DEVICE_FUNC - TriangularProduct + TriangularProduct operator*(const MatrixBase& rhs) const { return TriangularProduct - + (m_matrix, rhs.derived()); } /** Efficient vector/matrix times triangular matrix product */ template friend EIGEN_DEVICE_FUNC - TriangularProduct + TriangularProduct operator*(const MatrixBase& lhs, const TriangularView& rhs) { return TriangularProduct - + (lhs.derived(),rhs.m_matrix); } From 54fbbe7b4ec085260b6ad79a9eb912416482516d Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 9 Jul 2014 13:06:06 +0200 Subject: [PATCH 24/31] Add unit test for bug #839. --- test/triangular.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/triangular.cpp b/test/triangular.cpp index 54320390b..936c2aef3 100644 --- a/test/triangular.cpp +++ b/test/triangular.cpp @@ -113,6 +113,13 @@ template void triangular_square(const MatrixType& m) m3.setZero(); m3.template triangularView().setOnes(); VERIFY_IS_APPROX(m2,m3); + + m1.setRandom(); + m3 = m1.template triangularView(); + Matrix m5(cols, internal::random(1,20)); m5.setRandom(); + Matrix m6(internal::random(1,20), rows); m6.setRandom(); + VERIFY_IS_APPROX(m1.template triangularView() * m5, m3*m5); + VERIFY_IS_APPROX(m6*m1.template triangularView(), m6*m3); } From 62f948c56a782689947e4bbdcaab2b5cb0247ac5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 9 Jul 2014 16:01:24 +0200 Subject: [PATCH 25/31] Generalize unit testing of pscatter --- test/packetmath.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/test/packetmath.cpp b/test/packetmath.cpp index a51d31dbd..e5dc473c2 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -408,14 +408,17 @@ template void packetmath_scatter_gather() { for (int i=0; i()/RealScalar(PacketSize); } - EIGEN_ALIGN_DEFAULT Scalar buffer[PacketSize*11]; - memset(buffer, 0, 11*sizeof(Packet)); + + int stride = internal::random(1,20); + + EIGEN_ALIGN_DEFAULT Scalar buffer[PacketSize*20]; + memset(buffer, 0, 20*sizeof(Packet)); Packet packet = internal::pload(data1); - internal::pscatter(buffer, packet, 11); + internal::pscatter(buffer, packet, stride); - for (int i = 0; i < PacketSize*11; ++i) { - if ((i%11) == 0) { - VERIFY(isApproxAbs(buffer[i], data1[i/11], refvalue) && "pscatter"); + for (int i = 0; i < PacketSize*20; ++i) { + if ((i%stride) == 0 && i Date: Wed, 9 Jul 2014 16:54:15 +0200 Subject: [PATCH 26/31] Determine version of Metis library. Apparently, at least version 5.x is needed for Eigen/MetisSupport. Marked some internal variables as advanced --- cmake/FindCholmod.cmake | 2 +- cmake/FindFFTW.cmake | 2 +- cmake/FindMetis.cmake | 38 ++++++++++++++++++++++++++++++++++++-- test/CMakeLists.txt | 3 ++- 4 files changed, 40 insertions(+), 5 deletions(-) diff --git a/cmake/FindCholmod.cmake b/cmake/FindCholmod.cmake index 7b3046d45..23239c300 100644 --- a/cmake/FindCholmod.cmake +++ b/cmake/FindCholmod.cmake @@ -86,4 +86,4 @@ include(FindPackageHandleStandardArgs) find_package_handle_standard_args(CHOLMOD DEFAULT_MSG CHOLMOD_INCLUDES CHOLMOD_LIBRARIES) -mark_as_advanced(CHOLMOD_INCLUDES CHOLMOD_LIBRARIES AMD_LIBRARY COLAMD_LIBRARY SUITESPARSE_LIBRARY) +mark_as_advanced(CHOLMOD_INCLUDES CHOLMOD_LIBRARIES AMD_LIBRARY COLAMD_LIBRARY SUITESPARSE_LIBRARY CAMD_LIBRARY CCOLAMD_LIBRARY CHOLMOD_METIS_LIBRARY) diff --git a/cmake/FindFFTW.cmake b/cmake/FindFFTW.cmake index a9e9925e7..6c4dc9ab4 100644 --- a/cmake/FindFFTW.cmake +++ b/cmake/FindFFTW.cmake @@ -115,5 +115,5 @@ include(FindPackageHandleStandardArgs) find_package_handle_standard_args(FFTW DEFAULT_MSG FFTW_INCLUDES FFTW_LIBRARIES) -mark_as_advanced(FFTW_INCLUDES FFTW_LIBRARIES) +mark_as_advanced(FFTW_INCLUDES FFTW_LIBRARIES FFTW_LIB FFTWF_LIB FFTWL_LIB) diff --git a/cmake/FindMetis.cmake b/cmake/FindMetis.cmake index 627c3e9ae..e0040d320 100644 --- a/cmake/FindMetis.cmake +++ b/cmake/FindMetis.cmake @@ -10,16 +10,50 @@ find_path(METIS_INCLUDES PATHS $ENV{METISDIR} ${INCLUDE_INSTALL_DIR} - PATH_SUFFIXES + PATH_SUFFIXES + . metis include ) +macro(_metis_check_version) + file(READ "${METIS_INCLUDES}/metis.h" _metis_version_header) + + string(REGEX MATCH "define[ \t]+METIS_VER_MAJOR[ \t]+([0-9]+)" _metis_major_version_match "${_metis_version_header}") + set(METIS_MAJOR_VERSION "${CMAKE_MATCH_1}") + string(REGEX MATCH "define[ \t]+METIS_VER_MINOR[ \t]+([0-9]+)" _metis_minor_version_match "${_metis_version_header}") + set(METIS_MINOR_VERSION "${CMAKE_MATCH_1}") + string(REGEX MATCH "define[ \t]+METIS_VER_SUBMINOR[ \t]+([0-9]+)" _metis_subminor_version_match "${_metis_version_header}") + set(METIS_SUBMINOR_VERSION "${CMAKE_MATCH_1}") + if(NOT METIS_MAJOR_VERSION) + message(WARNING "Could not determine Metis version. Assuming version 4.0.0") + set(METIS_VERSION 4.0.0) + else() + set(METIS_VERSION ${METIS_MAJOR_VERSION}.${METIS_MINOR_VERSION}.${METIS_SUBMINOR_VERSION}) + endif() + if(${METIS_VERSION} VERSION_LESS ${Metis_FIND_VERSION}) + set(METIS_VERSION_OK FALSE) + else() + set(METIS_VERSION_OK TRUE) + endif() + + if(NOT METIS_VERSION_OK) + message(STATUS "Metis version ${METIS_VERSION} found in ${METIS_INCLUDES}, " + "but at least version ${Metis_FIND_VERSION} is required") + endif(NOT METIS_VERSION_OK) +endmacro(_metis_check_version) + + if(METIS_INCLUDES AND Metis_FIND_VERSION) + _metis_check_version() + else() + set(METIS_VERSION_OK TRUE) + endif() + find_library(METIS_LIBRARIES metis PATHS $ENV{METISDIR} ${LIB_INSTALL_DIR} PATH_SUFFIXES lib) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(METIS DEFAULT_MSG - METIS_INCLUDES METIS_LIBRARIES) + METIS_INCLUDES METIS_LIBRARIES METIS_VERSION_OK) mark_as_advanced(METIS_INCLUDES METIS_LIBRARIES) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 62c9c78a6..4521d07e4 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -82,7 +82,7 @@ endif() find_package(Pastix) find_package(Scotch) -find_package(Metis) +find_package(Metis 5.0 REQUIRED) if(PASTIX_FOUND) add_definitions("-DEIGEN_PASTIX_SUPPORT") include_directories(${PASTIX_INCLUDES}) @@ -299,6 +299,7 @@ ei_add_property(EIGEN_TESTING_SUMMARY "CXX_FLAGS: ${CMAKE_CXX_FLAGS}\n") ei_add_property(EIGEN_TESTING_SUMMARY "Sparse lib flags: ${SPARSE_LIBS}\n") option(EIGEN_TEST_EIGEN2 "Run whole Eigen2 test suite against EIGEN2_SUPPORT" OFF) +mark_as_advanced(EIGEN_TEST_EIGEN2) if(EIGEN_TEST_EIGEN2) message(WARNING "The Eigen2 test suite has been removed") endif() From 23bb592a2d0de937c68fc1c819642bbcfa2c1e4f Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 9 Jul 2014 17:21:16 +0200 Subject: [PATCH 27/31] Fix unit test when using 80bits FPU --- test/block.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/block.cpp b/test/block.cpp index 9ed5d7bc5..269acd28e 100644 --- a/test/block.cpp +++ b/test/block.cpp @@ -141,11 +141,11 @@ template void block(const MatrixType& m) VERIFY_IS_EQUAL( (m1.transpose().block(c1,r1,c2-c1+1,r2-r1+1).col(0)) , (m1.row(r1).segment(c1,c2-c1+1)).transpose() ); // expressions without direct access - VERIFY_IS_EQUAL( ((m1+m2).block(r1,c1,rows-r1,cols-c1).block(r2-r1,c2-c1,rows-r2,cols-c2)) , ((m1+m2).block(r2,c2,rows-r2,cols-c2)) ); - VERIFY_IS_EQUAL( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).row(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)) ); - VERIFY_IS_EQUAL( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).col(0)) , ((m1+m2).col(c1).segment(r1,r2-r1+1)) ); - VERIFY_IS_EQUAL( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).transpose().col(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)).transpose() ); - VERIFY_IS_EQUAL( ((m1+m2).transpose().block(c1,r1,c2-c1+1,r2-r1+1).col(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)).transpose() ); + VERIFY_IS_APPROX( ((m1+m2).block(r1,c1,rows-r1,cols-c1).block(r2-r1,c2-c1,rows-r2,cols-c2)) , ((m1+m2).block(r2,c2,rows-r2,cols-c2)) ); + VERIFY_IS_APPROX( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).row(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)) ); + VERIFY_IS_APPROX( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).col(0)) , ((m1+m2).col(c1).segment(r1,r2-r1+1)) ); + VERIFY_IS_APPROX( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).transpose().col(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)).transpose() ); + VERIFY_IS_APPROX( ((m1+m2).transpose().block(c1,r1,c2-c1+1,r2-r1+1).col(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)).transpose() ); // evaluation into plain matrices from expressions with direct access (stress MapBase) DynamicMatrixType dm; From e955725ff1434396cc41beda6f5989bef0940704 Mon Sep 17 00:00:00 2001 From: Kolja Brix Date: Thu, 10 Jul 2014 08:20:55 +0200 Subject: [PATCH 28/31] Fix GMRES: Initialize essential Householder vector with correct dimension. Add check if initial guess is already a sufficient approximation. --- unsupported/Eigen/src/IterativeSolvers/GMRES.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/unsupported/Eigen/src/IterativeSolvers/GMRES.h b/unsupported/Eigen/src/IterativeSolvers/GMRES.h index 073367506..c8c84069e 100644 --- a/unsupported/Eigen/src/IterativeSolvers/GMRES.h +++ b/unsupported/Eigen/src/IterativeSolvers/GMRES.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2011 Gael Guennebaud -// Copyright (C) 2012 Kolja Brix +// Copyright (C) 2012, 2014 Kolja Brix // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -72,16 +72,20 @@ bool gmres(const MatrixType & mat, const Rhs & rhs, Dest & x, const Precondition VectorType p0 = rhs - mat*x; VectorType r0 = precond.solve(p0); -// RealScalar r0_sqnorm = r0.squaredNorm(); + + // is initial guess already good enough? + if(abs(r0.norm()) < tol) { + return true; + } VectorType w = VectorType::Zero(restart + 1); - FMatrixType H = FMatrixType::Zero(m, restart + 1); + FMatrixType H = FMatrixType::Zero(m, restart + 1); // Hessenberg matrix VectorType tau = VectorType::Zero(restart + 1); std::vector < JacobiRotation < Scalar > > G(restart); // generate first Householder vector - VectorType e; + VectorType e(m-1); RealScalar beta; r0.makeHouseholder(e, tau.coeffRef(0), beta); w(0)=(Scalar) beta; From f27f55bee3efc2cafd01cb07d3faadf7eb490f66 Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Thu, 10 Jul 2014 14:59:18 +0200 Subject: [PATCH 29/31] Make MatrixBase::makeHouseholder resize its output vector if it is zero --- Eigen/src/Householder/Householder.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Householder/Householder.h b/Eigen/src/Householder/Householder.h index 32112af9b..9d33e1e62 100644 --- a/Eigen/src/Householder/Householder.h +++ b/Eigen/src/Householder/Householder.h @@ -80,7 +80,7 @@ void MatrixBase::makeHouseholder( { tau = RealScalar(0); beta = numext::real(c0); - essential.setZero(); + essential.setZero(size()-1); } else { From cf7cf7b4907859381e823d99588de94d5e31bd72 Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Thu, 10 Jul 2014 16:12:13 +0200 Subject: [PATCH 30/31] Backed out of changeset 6089:f27f55bee3efc2cafd01cb07d3faadf7eb490f66 Unfortunately this breaks things at other places --- Eigen/src/Householder/Householder.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Householder/Householder.h b/Eigen/src/Householder/Householder.h index 9d33e1e62..32112af9b 100644 --- a/Eigen/src/Householder/Householder.h +++ b/Eigen/src/Householder/Householder.h @@ -80,7 +80,7 @@ void MatrixBase::makeHouseholder( { tau = RealScalar(0); beta = numext::real(c0); - essential.setZero(size()-1); + essential.setZero(); } else { From d1460d92782ce0f7b90a8a52d44d50b44b167f98 Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Thu, 10 Jul 2014 16:23:20 +0200 Subject: [PATCH 31/31] stride must be DenseIndex not int --- Eigen/src/Core/arch/AVX/Complex.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h index b3cf7bb26..aa5aa1e34 100644 --- a/Eigen/src/Core/arch/AVX/Complex.h +++ b/Eigen/src/Core/arch/AVX/Complex.h @@ -310,13 +310,13 @@ template<> EIGEN_STRONG_INLINE Packet2cd ploaddup(const std::complex< template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet2cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet2cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); } -template<> EIGEN_DEVICE_FUNC inline Packet2cd pgather, Packet2cd>(const std::complex* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet2cd pgather, Packet2cd>(const std::complex* from, DenseIndex stride) { return Packet2cd(_mm256_set_pd(std::imag(from[1*stride]), std::real(from[1*stride]), std::imag(from[0*stride]), std::real(from[0*stride]))); } -template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet2cd>(std::complex* to, const Packet2cd& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet2cd>(std::complex* to, const Packet2cd& from, DenseIndex stride) { __m128d low = _mm256_extractf128_pd(from.v, 0); to[stride*0] = std::complex(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)));