Compare commits

..

2 Commits

Author SHA1 Message Date
Gael Guennebaud
72ffb63165 fix compilation for old but not so old versions of glew 2011-03-18 10:26:21 +01:00
Benoit Jacob
67e24b85a4 bump 2011-03-18 05:13:34 -04:00
467 changed files with 16932 additions and 31612 deletions

11
.hgeol
View File

@@ -1,8 +1,3 @@
[patterns]
scripts/*.in = LF
debug/msvc/*.dat = CRLF
unsupported/test/mpreal/*.* = CRLF
** = native
[repository]
native = LF
[patterns]
**.* = native
eigen_autoexp_part.dat = CRLF

View File

@@ -64,10 +64,6 @@ set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
find_package(StandardMathLibrary)
set(EIGEN_TEST_CUSTOM_LINKER_FLAGS "" CACHE STRING "Additional linker flags when linking unit tests.")
set(EIGEN_TEST_CUSTOM_CXX_FLAGS "" CACHE STRING "Additional compiler flags when compiling unit tests.")
set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "")
if(NOT STANDARD_MATH_LIBRARY_FOUND)
@@ -107,8 +103,6 @@ endif()
add_definitions("-DEIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS")
set(EIGEN_TEST_MAX_SIZE "320" CACHE STRING "Maximal matrix/vector size, default is 320")
if(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fexceptions -fno-check-new -fno-common -fstrict-aliasing")
set(CMAKE_CXX_FLAGS_DEBUG "-g3")
@@ -285,21 +279,9 @@ install(FILES
)
if(EIGEN_BUILD_PKGCONFIG)
SET(path_separator ":")
STRING(REPLACE ${path_separator} ";" pkg_config_libdir_search "$ENV{PKG_CONFIG_LIBDIR}")
message(STATUS "searching for 'pkgconfig' directory in PKG_CONFIG_LIBDIR ( $ENV{PKG_CONFIG_LIBDIR} ), ${CMAKE_INSTALL_PREFIX}/share, and ${CMAKE_INSTALL_PREFIX}/lib")
FIND_PATH(pkg_config_libdir pkgconfig ${pkg_config_libdir_search} ${CMAKE_INSTALL_PREFIX}/share ${CMAKE_INSTALL_PREFIX}/lib ${pkg_config_libdir_search})
if(pkg_config_libdir)
SET(pkg_config_install_dir ${pkg_config_libdir})
message(STATUS "found ${pkg_config_libdir}/pkgconfig" )
else(pkg_config_libdir)
SET(pkg_config_install_dir ${CMAKE_INSTALL_PREFIX}/share)
message(STATUS "pkgconfig not found; installing in ${pkg_config_install_dir}" )
endif(pkg_config_libdir)
configure_file(eigen3.pc.in eigen3.pc)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc
DESTINATION ${pkg_config_install_dir}/pkgconfig
DESTINATION share/pkgconfig
)
endif(EIGEN_BUILD_PKGCONFIG)
@@ -307,9 +289,44 @@ add_subdirectory(Eigen)
add_subdirectory(doc EXCLUDE_FROM_ALL)
include(EigenConfigureTesting)
# fixme, not sure this line is still needed:
add_custom_target(buildtests)
add_custom_target(check COMMAND "ctest")
add_dependencies(check buildtests)
# CMake/Ctest does not allow us to change the build command,
# so we have to workaround by directly editing the generated DartConfiguration.tcl file
# save CMAKE_MAKE_PROGRAM
set(CMAKE_MAKE_PROGRAM_SAVE ${CMAKE_MAKE_PROGRAM})
# and set a fake one
set(CMAKE_MAKE_PROGRAM "@EIGEN_MAKECOMMAND_PLACEHOLDER@")
include(CTest)
enable_testing() # must be called from the root CMakeLists, see man page
include(EigenTesting)
ei_init_testing()
# overwrite default DartConfiguration.tcl
# The worarounds are different for each version of the MSVC IDE
if(MSVC_IDE)
if(MSVC_VERSION EQUAL 1600) # MSVC 2010
set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} buildtests.vcxproj /p:Configuration=\${CTEST_CONFIGURATION_TYPE} \n # ")
else() # MSVC 2008 (TODO check MSVC 2005)
set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} /project buildtests")
endif()
else()
# for make and nmake
set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} buildtests")
endif()
configure_file(${CMAKE_BINARY_DIR}/DartConfiguration.tcl ${CMAKE_BINARY_DIR}/DartConfiguration.tcl)
# restore default CMAKE_MAKE_PROGRAM
set(CMAKE_MAKE_PROGRAM ${CMAKE_MAKE_PROGRAM_SAVE})
# un-set temporary variables so that it is like they never existed.
# CMake 2.6.3 introduces the more logical unset() syntax for this.
set(CMAKE_MAKE_PROGRAM_SAVE)
set(EIGEN_MAKECOMMAND_PLACEHOLDER)
configure_file(${CMAKE_SOURCE_DIR}/CTestCustom.cmake.in ${CMAKE_BINARY_DIR}/CTestCustom.cmake)
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
@@ -318,13 +335,15 @@ else()
add_subdirectory(test EXCLUDE_FROM_ALL)
endif()
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
add_subdirectory(blas)
add_subdirectory(lapack)
else()
add_subdirectory(blas EXCLUDE_FROM_ALL)
add_subdirectory(lapack EXCLUDE_FROM_ALL)
endif()
if(NOT MSVC)
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
add_subdirectory(blas)
add_subdirectory(lapack)
else()
add_subdirectory(blas EXCLUDE_FROM_ALL)
add_subdirectory(lapack EXCLUDE_FROM_ALL)
endif()
endif(NOT MSVC)
add_subdirectory(unsupported)

View File

@@ -1,26 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

View File

@@ -24,9 +24,6 @@ namespace Eigen {
#include "src/misc/Solve.h"
#include "src/Cholesky/LLT.h"
#include "src/Cholesky/LDLT.h"
#ifdef EIGEN_USE_LAPACKE
#include "src/Cholesky/LLT_MKL.h"
#endif
} // namespace Eigen

View File

@@ -1,34 +0,0 @@
#ifndef EIGEN_CHOLMODSUPPORT_MODULE_H
#define EIGEN_CHOLMODSUPPORT_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
extern "C" {
#include <cholmod.h>
}
namespace Eigen {
/** \ingroup Support_modules
* \defgroup CholmodSupport_Module CholmodSupport module
*
*
* \code
* #include <Eigen/CholmodSupport>
* \endcode
*/
#include "src/misc/Solve.h"
#include "src/misc/SparseSolve.h"
#include "src/CholmodSupport/CholmodSupport.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_CHOLMODSUPPORT_MODULE_H

View File

@@ -34,10 +34,6 @@
// defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization.
#include "src/Core/util/Macros.h"
// this include file manages BLAS and MKL related macros
// and inclusion of their respective header files
#include "src/Core/util/MKL_support.h"
// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into
// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks
#if !EIGEN_ALIGN
@@ -55,16 +51,16 @@
#define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
#endif
#endif
#else
// Remember that usage of defined() in a #define is undefined by the standard
#if (defined __SSE2__) && ( (!defined __GNUC__) || EIGEN_GNUC_AT_LEAST(4,2) )
#define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
#endif
#endif
// Remember that usage of defined() in a #define is undefined by the standard
#if (defined __SSE2__) && ( (!defined __GNUC__) || EIGEN_GNUC_AT_LEAST(4,2) )
#define EIGEN_SSE2_BUT_NOT_OLD_GCC
#endif
#ifndef EIGEN_DONT_VECTORIZE
#if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
#if defined (EIGEN_SSE2_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
// Defines symbols for compile-time detection of which instructions are
// used.
@@ -147,7 +143,6 @@
#ifdef EIGEN_HAS_ERRNO
#include <cerrno>
#endif
#include <cstddef>
#include <cstdlib>
#include <cmath>
#include <complex>
@@ -171,7 +166,7 @@
#include <intrin.h>
#endif
#if defined(_CPPUNWIND) || defined(__EXCEPTIONS)
#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_NO_EXCEPTIONS)
#define EIGEN_EXCEPTIONS
#endif
@@ -179,7 +174,16 @@
#include <new>
#endif
/** \brief Namespace containing all symbols from the %Eigen library. */
// this needs to be done after all possible windows C header includes and before any Eigen source includes
// (system C++ includes are supposed to be able to deal with this already):
// windows.h defines min and max macros which would make Eigen fail to compile.
#if defined(min) || defined(max)
#error The preprocessor symbols 'min' or 'max' are defined. If you are compiling on Windows, do #define NOMINMAX to prevent windows.h from defining these symbols.
#endif
// defined in bits/termios.h
#undef B0
namespace Eigen {
inline static const char *SimdInstructionSetsInUse(void) {
@@ -235,8 +239,6 @@ inline static const char *SimdInstructionSetsInUse(void) {
// we use size_t frequently and we'll never remember to prepend it with std:: everytime just to
// ensure QNX/QCC support
using std::size_t;
// gcc 4.6.0 wants std:: for ptrdiff_t
using std::ptrdiff_t;
/** \defgroup Core_Module Core module
* This is the main module of Eigen providing dense matrix and vector support
@@ -248,10 +250,6 @@ using std::ptrdiff_t;
* \endcode
*/
/** \defgroup Support_modules Support modules [category]
* Category of modules which add support for external libraries.
*/
#include "src/Core/util/Constants.h"
#include "src/Core/util/ForwardDeclarations.h"
#include "src/Core/util/Meta.h"
@@ -323,7 +321,7 @@ using std::ptrdiff_t;
#include "src/Core/CommaInitializer.h"
#include "src/Core/Flagged.h"
#include "src/Core/ProductBase.h"
#include "src/Core/GeneralProduct.h"
#include "src/Core/Product.h"
#include "src/Core/TriangularMatrix.h"
#include "src/Core/SelfAdjointView.h"
#include "src/Core/SolveTriangular.h"
@@ -352,21 +350,6 @@ using std::ptrdiff_t;
#include "src/Core/ArrayBase.h"
#include "src/Core/ArrayWrapper.h"
#ifdef EIGEN_USE_BLAS
#include "src/Core/products/GeneralMatrixMatrix_MKL.h"
#include "src/Core/products/GeneralMatrixVector_MKL.h"
#include "src/Core/products/GeneralMatrixMatrixTriangular_MKL.h"
#include "src/Core/products/SelfadjointMatrixMatrix_MKL.h"
#include "src/Core/products/SelfadjointMatrixVector_MKL.h"
#include "src/Core/products/TriangularMatrixMatrix_MKL.h"
#include "src/Core/products/TriangularMatrixVector_MKL.h"
#include "src/Core/products/TriangularSolverMatrix_MKL.h"
#endif // EIGEN_USE_BLAS
#ifdef EIGEN_USE_MKL_VML
#include "src/Core/Assign_MKL.h"
#endif
} // namespace Eigen
#include "src/Core/GlobalFunctions.h"

View File

@@ -33,8 +33,7 @@
namespace Eigen {
/** \ingroup Support_modules
* \defgroup Eigen2Support_Module Eigen2 support module
/** \defgroup Eigen2Support_Module Eigen2 support module
* This module provides a couple of deprecated functions improving the compatibility with Eigen2.
*
* To use it, define EIGEN2_SUPPORT before including any Eigen header
@@ -64,24 +63,6 @@ namespace Eigen {
// Eigen2 used to include iostream
#include<iostream>
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
using Eigen::Matrix##SizeSuffix##TypeSuffix; \
using Eigen::Vector##SizeSuffix##TypeSuffix; \
using Eigen::RowVector##SizeSuffix##TypeSuffix;
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \
#define EIGEN_USING_MATRIX_TYPEDEFS \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd)
#define USING_PART_OF_NAMESPACE_EIGEN \
EIGEN_USING_MATRIX_TYPEDEFS \
using Eigen::Matrix; \

View File

@@ -9,7 +9,6 @@
#include "Jacobi"
#include "Householder"
#include "LU"
#include "Geometry"
namespace Eigen {
@@ -36,11 +35,6 @@ namespace Eigen {
#include "src/Eigenvalues/ComplexSchur.h"
#include "src/Eigenvalues/ComplexEigenSolver.h"
#include "src/Eigenvalues/MatrixBaseEigenvalues.h"
#ifdef EIGEN_USE_LAPACKE
#include "src/Eigenvalues/RealSchur_MKL.h"
#include "src/Eigenvalues/ComplexSchur_MKL.h"
#include "src/Eigenvalues/SelfAdjointEigenSolver_MKL.h"
#endif
} // namespace Eigen

View File

@@ -1,37 +0,0 @@
#ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H
#define EIGEN_ITERATIVELINEARSOLVERS_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
/** \ingroup Sparse_modules
* \defgroup IterativeLinearSolvers_Module IterativeLinearSolvers module
*
* This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse.
* Those solvers are accessible via the following classes:
* - ConjugateGradient for selfadjoint (hermitian) matrices,
* - BiCGSTAB for general square matrices.
*
* Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport.
*
* \code
* #include <Eigen/IterativeLinearSolvers>
* \endcode
*/
#include "src/misc/Solve.h"
#include "src/misc/SparseSolve.h"
#include "src/IterativeLinearSolvers/IterativeSolverBase.h"
#include "src/IterativeLinearSolvers/BasicPreconditioners.h"
#include "src/IterativeLinearSolvers/ConjugateGradient.h"
#include "src/IterativeLinearSolvers/BiCGSTAB.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_ITERATIVELINEARSOLVERS_MODULE_H

View File

@@ -23,9 +23,6 @@ namespace Eigen {
#include "src/misc/Image.h"
#include "src/LU/FullPivLU.h"
#include "src/LU/PartialPivLU.h"
#ifdef EIGEN_USE_LAPACKE
#include "src/LU/PartialPivLU_MKL.h"
#endif
#include "src/LU/Determinant.h"
#include "src/LU/Inverse.h"

View File

@@ -1,27 +0,0 @@
#ifndef EIGEN_ORDERINGMETHODS_MODULE_H
#define EIGEN_ORDERINGMETHODS_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
/** \ingroup Sparse_modules
* \defgroup OrderingMethods_Module OrderingMethods module
*
* This module is currently for internal use only.
*
*
* \code
* #include <Eigen/OrderingMethods>
* \endcode
*/
#include "src/OrderingMethods/Amd.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_ORDERINGMETHODS_MODULE_H

View File

@@ -1,30 +0,0 @@
#ifndef EIGEN_PARDISOSUPPORT_MODULE_H
#define EIGEN_PARDISOSUPPORT_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
#include <mkl_pardiso.h>
#include <unsupported/Eigen/SparseExtra>
namespace Eigen {
/** \ingroup Support_modules
* \defgroup PARDISOSupport_Module PARDISOSupport module
*
* This module brings support for the Intel(R) MKL PARDISO direct sparse solvers
*
* \code
* #include <Eigen/PARDISOSupport>
* \endcode
*/
#include "src/PARDISOSupport/PARDISOSupport.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_PARDISOSUPPORT_MODULE_H

View File

@@ -28,10 +28,6 @@ namespace Eigen {
#include "src/QR/HouseholderQR.h"
#include "src/QR/FullPivHouseholderQR.h"
#include "src/QR/ColPivHouseholderQR.h"
#ifdef EIGEN_USE_LAPACKE
#include "src/QR/HouseholderQR_MKL.h"
#include "src/QR/ColPivHouseholderQR_MKL.h"
#endif
#ifdef EIGEN2_SUPPORT
#include "src/Eigen2Support/QR.h"

View File

@@ -13,9 +13,9 @@ namespace Eigen {
*
*
*
* This module provides SVD decomposition for matrices (both real and complex).
* This module provides SVD decomposition for (currently) real matrices.
* This decomposition is accessible via the following MatrixBase method:
* - MatrixBase::jacobiSvd()
* - MatrixBase::svd()
*
* \code
* #include <Eigen/SVD>
@@ -24,9 +24,6 @@ namespace Eigen {
#include "src/misc/Solve.h"
#include "src/SVD/JacobiSVD.h"
#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
#include "src/SVD/JacobiSVD_MKL.h"
#endif
#include "src/SVD/UpperBidiagonalization.h"
#ifdef EIGEN2_SUPPORT

View File

@@ -1,27 +1,69 @@
#ifndef EIGEN_SPARSE_MODULE_H
#define EIGEN_SPARSE_MODULE_H
#include "Core"
#include "src/Core/util/DisableStupidWarnings.h"
#include <vector>
#include <map>
#include <cstdlib>
#include <cstring>
#include <algorithm>
#ifdef EIGEN2_SUPPORT
#define EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET
#endif
#ifndef EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET
#error The sparse module API is not stable yet. To use it anyway, please define the EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET preprocessor token.
#endif
namespace Eigen {
/** \defgroup Sparse_modules Sparse modules
/** \defgroup Sparse_Module Sparse module
*
* Meta-module including all related modules:
* - SparseCore
* - OrderingMethods
* - SparseCholesky
* - IterativeLinearSolvers
*
*
* See the \ref TutorialSparse "Sparse tutorial"
*
* \code
* #include <Eigen/Sparse>
* \endcode
*/
/** The type used to identify a general sparse storage. */
struct Sparse {};
#include "src/Sparse/SparseUtil.h"
#include "src/Sparse/SparseMatrixBase.h"
#include "src/Sparse/CompressedStorage.h"
#include "src/Sparse/AmbiVector.h"
#include "src/Sparse/SparseMatrix.h"
#include "src/Sparse/DynamicSparseMatrix.h"
#include "src/Sparse/MappedSparseMatrix.h"
#include "src/Sparse/SparseVector.h"
#include "src/Sparse/CoreIterators.h"
#include "src/Sparse/SparseBlock.h"
#include "src/Sparse/SparseTranspose.h"
#include "src/Sparse/SparseCwiseUnaryOp.h"
#include "src/Sparse/SparseCwiseBinaryOp.h"
#include "src/Sparse/SparseDot.h"
#include "src/Sparse/SparseAssign.h"
#include "src/Sparse/SparseRedux.h"
#include "src/Sparse/SparseFuzzy.h"
#include "src/Sparse/SparseProduct.h"
#include "src/Sparse/SparseSparseProduct.h"
#include "src/Sparse/SparseDenseProduct.h"
#include "src/Sparse/SparseDiagonalProduct.h"
#include "src/Sparse/SparseTriangularView.h"
#include "src/Sparse/SparseSelfAdjointView.h"
#include "src/Sparse/TriangularSolver.h"
#include "src/Sparse/SparseView.h"
} // namespace Eigen
#include "SparseCore"
#include "OrderingMethods"
#include "SparseCholesky"
#include "IterativeLinearSolvers"
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_SPARSE_MODULE_H

View File

@@ -1,34 +0,0 @@
#ifndef EIGEN_SPARSECHOLESKY_MODULE_H
#define EIGEN_SPARSECHOLESKY_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
/** \ingroup Sparse_modules
* \defgroup SparseCholesky_Module SparseCholesky module
*
* This module currently provides two variants of the direct sparse Cholesky decomposition for selfadjoint (hermitian) matrices.
* Those decompositions are accessible via the following classes:
* - SimplicialLLt,
* - SimplicialLDLt
*
* Such problems can also be solved using the ConjugateGradient solver from the IterativeLinearSolvers module.
*
* \code
* #include <Eigen/SparseCholesky>
* \endcode
*/
#include "src/misc/Solve.h"
#include "src/misc/SparseSolve.h"
#include "src/SparseCholesky/SimplicialCholesky.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_SPARSECHOLESKY_MODULE_H

View File

@@ -1,65 +0,0 @@
#ifndef EIGEN_SPARSECORE_MODULE_H
#define EIGEN_SPARSECORE_MODULE_H
#include "Core"
#include "src/Core/util/DisableStupidWarnings.h"
#include <vector>
#include <map>
#include <cstdlib>
#include <cstring>
#include <algorithm>
namespace Eigen {
/** \ingroup Sparse_modules
* \defgroup SparseCore_Module SparseCore module
*
* This module provides a sparse matrix representation, and basic associatd matrix manipulations
* and operations.
*
* See the \ref TutorialSparse "Sparse tutorial"
*
* \code
* #include <Eigen/SparseCore>
* \endcode
*
* This module depends on: Core.
*/
/** The type used to identify a general sparse storage. */
struct Sparse {};
#include "src/SparseCore/SparseUtil.h"
#include "src/SparseCore/SparseMatrixBase.h"
#include "src/SparseCore/CompressedStorage.h"
#include "src/SparseCore/AmbiVector.h"
#include "src/SparseCore/SparseMatrix.h"
#include "src/SparseCore/MappedSparseMatrix.h"
#include "src/SparseCore/SparseVector.h"
#include "src/SparseCore/CoreIterators.h"
#include "src/SparseCore/SparseBlock.h"
#include "src/SparseCore/SparseTranspose.h"
#include "src/SparseCore/SparseCwiseUnaryOp.h"
#include "src/SparseCore/SparseCwiseBinaryOp.h"
#include "src/SparseCore/SparseDot.h"
#include "src/SparseCore/SparseAssign.h"
#include "src/SparseCore/SparseRedux.h"
#include "src/SparseCore/SparseFuzzy.h"
#include "src/SparseCore/ConservativeSparseSparseProduct.h"
#include "src/SparseCore/SparseSparseProductWithPruning.h"
#include "src/SparseCore/SparseProduct.h"
#include "src/SparseCore/SparseDenseProduct.h"
#include "src/SparseCore/SparseDiagonalProduct.h"
#include "src/SparseCore/SparseTriangularView.h"
#include "src/SparseCore/SparseSelfAdjointView.h"
#include "src/SparseCore/TriangularSolver.h"
#include "src/SparseCore/SparseView.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_SPARSECORE_MODULE_H

View File

@@ -1,53 +0,0 @@
#ifndef EIGEN_SUPERLUSUPPORT_MODULE_H
#define EIGEN_SUPERLUSUPPORT_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
#ifdef EMPTY
#define EIGEN_EMPTY_WAS_ALREADY_DEFINED
#endif
typedef int int_t;
#include <slu_Cnames.h>
#include <supermatrix.h>
#include <slu_util.h>
// slu_util.h defines a preprocessor token named EMPTY which is really polluting,
// so we remove it in favor of a SUPERLU_EMPTY token.
// If EMPTY was already, defined then we don't undef it.
#if defined(EIGEN_EMPTY_WAS_ALREADY_DEFINED)
# undef EIGEN_EMPTY_WAS_ALREADY_DEFINED
#elif defined(EMPTY)
# undef EMPTY
#endif
#define SUPERLU_EMPTY (-1)
namespace Eigen { struct SluMatrix; }
namespace Eigen {
/** \ingroup Support_modules
* \defgroup SuperLUSupport_Module SuperLUSupport module
*
* \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting.
*
* \code
* #include <Eigen/SuperLUSupport>
* \endcode
*/
#include "src/misc/Solve.h"
#include "src/misc/SparseSolve.h"
#include "src/SuperLUSupport/SuperLUSupport.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_SUPERLUSUPPORT_MODULE_H

View File

@@ -1,34 +0,0 @@
#ifndef EIGEN_UMFPACKSUPPORT_MODULE_H
#define EIGEN_UMFPACKSUPPORT_MODULE_H
#include "SparseCore"
#include "src/Core/util/DisableStupidWarnings.h"
extern "C" {
#include <umfpack.h>
}
namespace Eigen {
/** \ingroup Support_modules
* \defgroup UmfPackSupport_Module UmfPackSupport module
*
*
*
*
* \code
* #include <Eigen/UmfPackSupport>
* \endcode
*/
#include "src/misc/Solve.h"
#include "src/misc/SparseSolve.h"
#include "src/UmfPackSupport/UmfPackSupport.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_UMFPACKSUPPORT_MODULE_H

View File

@@ -1,10 +1,9 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2009 Keir Mierle <mierle@gmail.com>
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2011 Timothy E. Holy <tim.holy@gmail.com >
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -32,15 +31,13 @@ namespace internal {
template<typename MatrixType, int UpLo> struct LDLT_Traits;
}
/** \ingroup Cholesky_Module
/** \ingroup cholesky_Module
*
* \class LDLT
*
* \brief Robust Cholesky decomposition of a matrix with pivoting
*
* \param MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
* \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
* The other triangular part won't be read.
*
* Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite
* matrix \f$ A \f$ such that \f$ A = P^TLDL^*P \f$, where P is a permutation matrix, L
@@ -51,10 +48,14 @@ template<typename MatrixType, int UpLo> struct LDLT_Traits;
* on D also stabilizes the computation.
*
* Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky
* decomposition to determine whether a system of equations has a solution.
* decomposition to determine whether a system of equations has a solution.
*
* \sa MatrixBase::ldlt(), class LLT
*/
/* THIS PART OF THE DOX IS CURRENTLY DISABLED BECAUSE INACCURATE BECAUSE OF BUG IN THE DECOMPOSITION CODE
* Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
* the strict lower part does not have to store correct values.
*/
template<typename _MatrixType, int _UpLo> class LDLT
{
public:
@@ -97,11 +98,6 @@ template<typename _MatrixType, int _UpLo> class LDLT
m_isInitialized(false)
{}
/** \brief Constructor with decomposition
*
* This calculates the decomposition for the input \a matrix.
* \sa LDLT(Index size)
*/
LDLT(const MatrixType& matrix)
: m_matrix(matrix.rows(), matrix.cols()),
m_transpositions(matrix.rows()),
@@ -111,14 +107,6 @@ template<typename _MatrixType, int _UpLo> class LDLT
compute(matrix);
}
/** Clear any existing decomposition
* \sa rankUpdate(w,sigma)
*/
void setZero()
{
m_isInitialized = false;
}
/** \returns a view of the upper triangular matrix U */
inline typename Traits::MatrixU matrixU() const
{
@@ -142,14 +130,14 @@ template<typename _MatrixType, int _UpLo> class LDLT
}
/** \returns the coefficients of the diagonal matrix D */
inline Diagonal<const MatrixType> vectorD() const
inline Diagonal<const MatrixType> vectorD(void) const
{
eigen_assert(m_isInitialized && "LDLT is not initialized.");
return m_matrix.diagonal();
}
/** \returns true if the matrix is positive (semidefinite) */
inline bool isPositive() const
inline bool isPositive(void) const
{
eigen_assert(m_isInitialized && "LDLT is not initialized.");
return m_sign == 1;
@@ -170,19 +158,10 @@ template<typename _MatrixType, int _UpLo> class LDLT
}
/** \returns a solution x of \f$ A x = b \f$ using the current decomposition of A.
*
* This function also supports in-place solves using the syntax <tt>x = decompositionObject.solve(x)</tt> .
*
* \note_about_checking_solutions
*
* More precisely, this method solves \f$ A x = b \f$ using the decomposition \f$ A = P^T L D L^* P \f$
* by solving the systems \f$ P^T y_1 = b \f$, \f$ L y_2 = y_1 \f$, \f$ D y_3 = y_2 \f$,
* \f$ L^* y_4 = y_3 \f$ and \f$ P x = y_4 \f$ in succession. If the matrix \f$ A \f$ is singular, then
* \f$ D \f$ will also be singular (all the other matrices are invertible). In that case, the
* least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function
* computes the least-square solution of \f$ A x = b \f$ is \f$ A \f$ is singular.
*
* \sa MatrixBase::ldlt()
* \sa solveInPlace(), MatrixBase::ldlt()
*/
template<typename Rhs>
inline const internal::solve_retval<LDLT, Rhs>
@@ -208,9 +187,6 @@ template<typename _MatrixType, int _UpLo> class LDLT
LDLT& compute(const MatrixType& matrix);
template <typename Derived>
LDLT& rankUpdate(const MatrixBase<Derived>& w,RealScalar alpha=1);
/** \returns the internal LDLT decomposition matrix
*
* TODO: document the storage layout
@@ -226,17 +202,6 @@ template<typename _MatrixType, int _UpLo> class LDLT
inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); }
/** \brief Reports whether previous computation was successful.
*
* \returns \c Success if computation was succesful,
* \c NumericalIssue if the matrix.appears to be negative.
*/
ComputationInfo info() const
{
eigen_assert(m_isInitialized && "LDLT is not initialized.");
return Success;
}
protected:
/** \internal
@@ -275,7 +240,7 @@ template<> struct ldlt_inplace<Lower>
return true;
}
RealScalar cutoff(0), biggest_in_corner;
RealScalar cutoff = 0, biggest_in_corner;
for (Index k = 0; k < size; ++k)
{
@@ -343,61 +308,6 @@ template<> struct ldlt_inplace<Lower>
return true;
}
// Reference for the algorithm: Davis and Hager, "Multiple Rank
// Modifications of a Sparse Cholesky Factorization" (Algorithm 1)
// Trivial rearrangements of their computations (Timothy E. Holy)
// allow their algorithm to work for rank-1 updates even if the
// original matrix is not of full rank.
// Here only rank-1 updates are implemented, to reduce the
// requirement for intermediate storage and improve accuracy
template<typename MatrixType, typename WDerived>
static bool updateInPlace(MatrixType& mat, MatrixBase<WDerived>& w, typename MatrixType::RealScalar sigma=1)
{
using internal::isfinite;
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
typedef typename MatrixType::Index Index;
const Index size = mat.rows();
eigen_assert(mat.cols() == size && w.size()==size);
RealScalar alpha = 1;
// Apply the update
for (Index j = 0; j < size; j++)
{
// Check for termination due to an original decomposition of low-rank
if (!isfinite(alpha))
break;
// Update the diagonal terms
RealScalar dj = real(mat.coeff(j,j));
Scalar wj = w.coeff(j);
RealScalar swj2 = sigma*abs2(wj);
RealScalar gamma = dj*alpha + swj2;
mat.coeffRef(j,j) += swj2/alpha;
alpha += swj2/dj;
// Update the terms of L
Index rs = size-j-1;
w.tail(rs) -= wj * mat.col(j).tail(rs);
if(gamma != 0)
mat.col(j).tail(rs) += (sigma*conj(wj)/gamma)*w.tail(rs);
}
return true;
}
template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
static bool update(MatrixType& mat, const TranspositionType& transpositions, Workspace& tmp, const WType& w, typename MatrixType::RealScalar sigma=1)
{
// Apply the permutation to the input w
tmp = transpositions * w;
return ldlt_inplace<Lower>::updateInPlace(mat,tmp,sigma);
}
};
template<> struct ldlt_inplace<Upper>
@@ -408,29 +318,22 @@ template<> struct ldlt_inplace<Upper>
Transpose<MatrixType> matt(mat);
return ldlt_inplace<Lower>::unblocked(matt, transpositions, temp, sign);
}
template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
static EIGEN_STRONG_INLINE bool update(MatrixType& mat, TranspositionType& transpositions, Workspace& tmp, WType& w, typename MatrixType::RealScalar sigma=1)
{
Transpose<MatrixType> matt(mat);
return ldlt_inplace<Lower>::update(matt, transpositions, tmp, w.conjugate(), sigma);
}
};
template<typename MatrixType> struct LDLT_Traits<MatrixType,Lower>
{
typedef TriangularView<const MatrixType, UnitLower> MatrixL;
typedef TriangularView<const typename MatrixType::AdjointReturnType, UnitUpper> MatrixU;
static inline MatrixL getL(const MatrixType& m) { return m; }
static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
typedef TriangularView<MatrixType, UnitLower> MatrixL;
typedef TriangularView<typename MatrixType::AdjointReturnType, UnitUpper> MatrixU;
inline static MatrixL getL(const MatrixType& m) { return m; }
inline static MatrixU getU(const MatrixType& m) { return m.adjoint(); }
};
template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
{
typedef TriangularView<const typename MatrixType::AdjointReturnType, UnitLower> MatrixL;
typedef TriangularView<const MatrixType, UnitUpper> MatrixU;
static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); }
static inline MatrixU getU(const MatrixType& m) { return m; }
typedef TriangularView<typename MatrixType::AdjointReturnType, UnitLower> MatrixL;
typedef TriangularView<MatrixType, UnitUpper> MatrixU;
inline static MatrixL getL(const MatrixType& m) { return m.adjoint(); }
inline static MatrixU getU(const MatrixType& m) { return m; }
};
} // end namespace internal
@@ -455,37 +358,6 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
return *this;
}
/** Update the LDLT decomposition: given A = L D L^T, efficiently compute the decomposition of A + sigma w w^T.
* \param w a vector to be incorporated into the decomposition.
* \param sigma a scalar, +1 for updates and -1 for "downdates," which correspond to removing previously-added column vectors. Optional; default value is +1.
* \sa setZero()
*/
template<typename MatrixType, int _UpLo>
template<typename Derived>
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w,typename NumTraits<typename MatrixType::Scalar>::Real sigma)
{
const Index size = w.rows();
if (m_isInitialized)
{
eigen_assert(m_matrix.rows()==size);
}
else
{
m_matrix.resize(size,size);
m_matrix.setZero();
m_transpositions.resize(size);
for (Index i = 0; i < size; i++)
m_transpositions.coeffRef(i) = i;
m_temporary.resize(size);
m_sign = sigma;
m_isInitialized = true;
}
internal::ldlt_inplace<UpLo>::update(m_matrix, m_transpositions, m_temporary, w, sigma);
return *this;
}
namespace internal {
template<typename _MatrixType, int _UpLo, typename Rhs>
struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
@@ -504,21 +376,7 @@ struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
dec().matrixL().solveInPlace(dst);
// dst = D^-1 (L^-1 P b)
// more precisely, use pseudo-inverse of D (see bug 241)
using std::abs;
using std::max;
typedef typename LDLTType::MatrixType MatrixType;
typedef typename LDLTType::Scalar Scalar;
typedef typename LDLTType::RealScalar RealScalar;
const Diagonal<const MatrixType> vectorD = dec().vectorD();
RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() * NumTraits<Scalar>::epsilon(),
RealScalar(1) / NumTraits<RealScalar>::highest()); // motivated by LAPACK's xGELSS
for (Index i = 0; i < vectorD.size(); ++i) {
if(abs(vectorD(i)) > tolerance)
dst.row(i) /= vectorD(i);
else
dst.row(i).setZero();
}
dst = dec().vectorD().asDiagonal().inverse() * dst;
// dst = L^-T (D^-1 L^-1 P b)
dec().matrixU().solveInPlace(dst);

View File

@@ -29,15 +29,13 @@ namespace internal{
template<typename MatrixType, int UpLo> struct LLT_Traits;
}
/** \ingroup Cholesky_Module
/** \ingroup cholesky_Module
*
* \class LLT
*
* \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features
*
* \param MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
* \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
* The other triangular part won't be read.
*
* This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
* matrix A such that A = LL^* = U^*U, where L is lower triangular.
@@ -51,9 +49,6 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
* use LDLT instead for the semidefinite case. Also, do not use a Cholesky decomposition to determine whether a system of equations
* has a solution.
*
* Example: \include LLT_example.cpp
* Output: \verbinclude LLT_example.out
*
* \sa MatrixBase::llt(), class LDLT
*/
/* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
@@ -183,9 +178,6 @@ template<typename _MatrixType, int _UpLo> class LLT
inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); }
template<typename VectorType>
LLT& rankUpdate(const VectorType& vec, const RealScalar& sigma = 1);
protected:
/** \internal
* Used to compute and store L
@@ -198,15 +190,16 @@ template<typename _MatrixType, int _UpLo> class LLT
namespace internal {
template<typename Scalar, int UpLo> struct llt_inplace;
template<int UpLo> struct llt_inplace;
template<typename Scalar> struct llt_inplace<Scalar, Lower>
template<> struct llt_inplace<Lower>
{
typedef typename NumTraits<Scalar>::Real RealScalar;
template<typename MatrixType>
static typename MatrixType::Index unblocked(MatrixType& mat)
{
typedef typename MatrixType::Index Index;
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
eigen_assert(mat.rows()==mat.cols());
const Index size = mat.rows();
@@ -240,7 +233,7 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
Index blockSize = size/8;
blockSize = (blockSize/16)*16;
blockSize = (std::min)((std::max)(blockSize,Index(8)), Index(128));
blockSize = std::min(std::max(blockSize,Index(8)), Index(128));
for (Index k=0; k<size; k+=blockSize)
{
@@ -248,7 +241,7 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
// A00 | - | -
// lu = A10 | A11 | -
// A20 | A21 | A22
Index bs = (std::min)(blockSize, size-k);
Index bs = std::min(blockSize, size-k);
Index rs = size - k - bs;
Block<MatrixType,Dynamic,Dynamic> A11(m,k, k, bs,bs);
Block<MatrixType,Dynamic,Dynamic> A21(m,k+bs,k, rs,bs);
@@ -261,133 +254,55 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
}
return -1;
}
template<typename MatrixType, typename VectorType>
static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
{
typedef typename MatrixType::Index Index;
typedef typename MatrixType::ColXpr ColXpr;
typedef typename internal::remove_all<ColXpr>::type ColXprCleaned;
typedef typename ColXprCleaned::SegmentReturnType ColXprSegment;
typedef Matrix<Scalar,Dynamic,1> TempVectorType;
typedef typename TempVectorType::SegmentReturnType TempVecSegment;
int n = mat.cols();
eigen_assert(mat.rows()==n && vec.size()==n);
TempVectorType temp;
if(sigma>0)
{
// This version is based on Givens rotations.
// It is faster than the other one below, but only works for updates,
// i.e., for sigma > 0
temp = sqrt(sigma) * vec;
for(int i=0; i<n; ++i)
{
JacobiRotation<Scalar> g;
g.makeGivens(mat(i,i), -temp(i), &mat(i,i));
int rs = n-i-1;
if(rs>0)
{
ColXprSegment x(mat.col(i).tail(rs));
TempVecSegment y(temp.tail(rs));
apply_rotation_in_the_plane(x, y, g);
}
}
}
else
{
temp = vec;
RealScalar beta = 1;
for(int j=0; j<n; ++j)
{
RealScalar Ljj = real(mat.coeff(j,j));
RealScalar dj = abs2(Ljj);
Scalar wj = temp.coeff(j);
RealScalar swj2 = sigma*abs2(wj);
RealScalar gamma = dj*beta + swj2;
RealScalar x = dj + swj2/beta;
if (x<=RealScalar(0))
return j;
RealScalar nLjj = sqrt(x);
mat.coeffRef(j,j) = nLjj;
beta += swj2/dj;
// Update the terms of L
Index rs = n-j-1;
if(rs)
{
temp.tail(rs) -= (wj/Ljj) * mat.col(j).tail(rs);
if(gamma != 0)
mat.col(j).tail(rs) = (nLjj/Ljj) * mat.col(j).tail(rs) + (nLjj * sigma*conj(wj)/gamma)*temp.tail(rs);
}
}
}
return -1;
}
};
template<typename Scalar> struct llt_inplace<Scalar, Upper>
template<> struct llt_inplace<Upper>
{
typedef typename NumTraits<Scalar>::Real RealScalar;
template<typename MatrixType>
static EIGEN_STRONG_INLINE typename MatrixType::Index unblocked(MatrixType& mat)
{
Transpose<MatrixType> matt(mat);
return llt_inplace<Scalar, Lower>::unblocked(matt);
return llt_inplace<Lower>::unblocked(matt);
}
template<typename MatrixType>
static EIGEN_STRONG_INLINE typename MatrixType::Index blocked(MatrixType& mat)
{
Transpose<MatrixType> matt(mat);
return llt_inplace<Scalar, Lower>::blocked(matt);
}
template<typename MatrixType, typename VectorType>
static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
{
Transpose<MatrixType> matt(mat);
return llt_inplace<Scalar, Lower>::rankUpdate(matt, vec.conjugate(), sigma);
return llt_inplace<Lower>::blocked(matt);
}
};
template<typename MatrixType> struct LLT_Traits<MatrixType,Lower>
{
typedef TriangularView<const MatrixType, Lower> MatrixL;
typedef TriangularView<const typename MatrixType::AdjointReturnType, Upper> MatrixU;
static inline MatrixL getL(const MatrixType& m) { return m; }
static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
typedef TriangularView<MatrixType, Lower> MatrixL;
typedef TriangularView<typename MatrixType::AdjointReturnType, Upper> MatrixU;
inline static MatrixL getL(const MatrixType& m) { return m; }
inline static MatrixU getU(const MatrixType& m) { return m.adjoint(); }
static bool inplace_decomposition(MatrixType& m)
{ return llt_inplace<typename MatrixType::Scalar, Lower>::blocked(m)==-1; }
{ return llt_inplace<Lower>::blocked(m)==-1; }
};
template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
{
typedef TriangularView<const typename MatrixType::AdjointReturnType, Lower> MatrixL;
typedef TriangularView<const MatrixType, Upper> MatrixU;
static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); }
static inline MatrixU getU(const MatrixType& m) { return m; }
typedef TriangularView<typename MatrixType::AdjointReturnType, Lower> MatrixL;
typedef TriangularView<MatrixType, Upper> MatrixU;
inline static MatrixL getL(const MatrixType& m) { return m.adjoint(); }
inline static MatrixU getU(const MatrixType& m) { return m; }
static bool inplace_decomposition(MatrixType& m)
{ return llt_inplace<typename MatrixType::Scalar, Upper>::blocked(m)==-1; }
{ return llt_inplace<Upper>::blocked(m)==-1; }
};
} // end namespace internal
/** Computes / recomputes the Cholesky decomposition A = LL^* = U^*U of \a matrix
*
* \returns a reference to *this
*
* Example: \include TutorialLinAlgComputeTwice.cpp
* Output: \verbinclude TutorialLinAlgComputeTwice.out
* \returns a reference to *this
*/
template<typename MatrixType, int _UpLo>
LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
{
eigen_assert(a.rows()==a.cols());
assert(a.rows()==a.cols());
const Index size = a.rows();
m_matrix.resize(size, size);
m_matrix = a;
@@ -399,26 +314,6 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
return *this;
}
/** Performs a rank one update (or dowdate) of the current decomposition.
* If A = LL^* before the rank one update,
* then after it we have LL^* = A + sigma * v v^* where \a v must be a vector
* of same dimension.
*/
template<typename MatrixType, int _UpLo>
template<typename VectorType>
LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::rankUpdate(const VectorType& v, const RealScalar& sigma)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType);
eigen_assert(v.size()==m_matrix.cols());
eigen_assert(m_isInitialized);
if(internal::llt_inplace<typename MatrixType::Scalar, UpLo>::rankUpdate(m_matrix,v,sigma)>=0)
m_info = NumericalIssue;
else
m_info = Success;
return *this;
}
namespace internal {
template<typename _MatrixType, int UpLo, typename Rhs>
struct solve_retval<LLT<_MatrixType, UpLo>, Rhs>
@@ -489,4 +384,3 @@ SelfAdjointView<MatrixType, UpLo>::llt() const
}
#endif // EIGEN_LLT_H

View File

@@ -1,123 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* LLt decomposition based on LAPACKE_?potrf function.
********************************************************************************
*/
#ifndef EIGEN_LLT_MKL_H
#define EIGEN_LLT_MKL_H
#include "Eigen/src/Core/util/MKL_support.h"
#include <iostream>
namespace internal {
template<typename Scalar> struct mkl_llt;
#define EIGEN_MKL_LLT(EIGTYPE, MKLTYPE, MKLPREFIX) \
template<> struct mkl_llt<EIGTYPE> \
{ \
template<typename MatrixType> \
static inline typename MatrixType::Index potrf(MatrixType& m, char uplo) \
{ \
lapack_int matrix_order; \
lapack_int size, lda, info, StorageOrder; \
EIGTYPE* a; \
eigen_assert(m.rows()==m.cols()); \
/* Set up parameters for ?potrf */ \
size = m.rows(); \
StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \
matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
a = &(m.coeffRef(0,0)); \
lda = m.outerStride(); \
\
info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \
info = (info==0) ? Success : NumericalIssue; \
return info; \
} \
}; \
template<> struct llt_inplace<EIGTYPE, Lower> \
{ \
template<typename MatrixType> \
static typename MatrixType::Index blocked(MatrixType& m) \
{ \
return mkl_llt<EIGTYPE>::potrf(m, 'L'); \
} \
template<typename MatrixType, typename VectorType> \
static void rankUpdate(MatrixType& mat, const VectorType& vec) \
{ \
typedef typename MatrixType::ColXpr ColXpr; \
typedef typename internal::remove_all<ColXpr>::type ColXprCleaned; \
typedef typename ColXprCleaned::SegmentReturnType ColXprSegment; \
typedef typename MatrixType::Scalar Scalar; \
typedef Matrix<Scalar,Dynamic,1> TempVectorType; \
typedef typename TempVectorType::SegmentReturnType TempVecSegment; \
\
int n = mat.cols(); \
eigen_assert(mat.rows()==n && vec.size()==n); \
TempVectorType temp(vec); \
\
for(int i=0; i<n; ++i) \
{ \
JacobiRotation<Scalar> g; \
g.makeGivens(mat(i,i), -temp(i), &mat(i,i)); \
\
int rs = n-i-1; \
if(rs>0) \
{ \
ColXprSegment x(mat.col(i).tail(rs)); \
TempVecSegment y(temp.tail(rs)); \
apply_rotation_in_the_plane(x, y, g); \
} \
} \
} \
}; \
template<> struct llt_inplace<EIGTYPE, Upper> \
{ \
template<typename MatrixType> \
static typename MatrixType::Index blocked(MatrixType& m) \
{ \
return mkl_llt<EIGTYPE>::potrf(m, 'U'); \
} \
template<typename MatrixType, typename VectorType> \
static void rankUpdate(MatrixType& mat, const VectorType& vec) \
{ \
Transpose<MatrixType> matt(mat); \
return llt_inplace<EIGTYPE, Lower>::rankUpdate(matt, vec.conjugate()); \
} \
};
EIGEN_MKL_LLT(double, double, d)
EIGEN_MKL_LLT(float, float, s)
EIGEN_MKL_LLT(dcomplex, MKL_Complex16, z)
EIGEN_MKL_LLT(scomplex, MKL_Complex8, c)
}
#endif // EIGEN_LLT_MKL_H

View File

@@ -1,6 +0,0 @@
FILE(GLOB Eigen_CholmodSupport_SRCS "*.h")
INSTALL(FILES
${Eigen_CholmodSupport_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/CholmodSupport COMPONENT Devel
)

View File

@@ -68,8 +68,10 @@ class Array
friend struct internal::conservative_resize_like_impl;
using Base::m_storage;
public:
enum { NeedsToAlign = (!(Options&DontAlign))
&& SizeAtCompileTime!=Dynamic && ((static_cast<int>(sizeof(Scalar))*SizeAtCompileTime)%16)==0 };
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
using Base::base;
using Base::coeff;

View File

@@ -159,7 +159,7 @@ template<typename Derived> class ArrayBase
/** \returns an \link MatrixBase Matrix \endlink expression of this array
* \sa MatrixBase::array() */
MatrixWrapper<Derived> matrix() { return derived(); }
const MatrixWrapper<const Derived> matrix() const { return derived(); }
const MatrixWrapper<Derived> matrix() const { return derived(); }
// template<typename Dest>
// inline void evalTo(Dest& dst) const { dst = matrix(); }
@@ -174,10 +174,10 @@ template<typename Derived> class ArrayBase
protected:
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator+=(const MatrixBase<OtherDerived>& )
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator-=(const MatrixBase<OtherDerived>& )
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
};
/** replaces \c *this by \c *this - \a other.

View File

@@ -53,25 +53,16 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper)
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper)
typedef typename internal::conditional<
internal::is_lvalue<ExpressionType>::value,
Scalar,
const Scalar
>::type ScalarWithConstIfNotLvalue;
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
inline ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
inline ArrayWrapper(const ExpressionType& matrix) : m_expression(matrix) {}
inline Index rows() const { return m_expression.rows(); }
inline Index cols() const { return m_expression.cols(); }
inline Index outerStride() const { return m_expression.outerStride(); }
inline Index innerStride() const { return m_expression.innerStride(); }
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
inline const Scalar* data() const { return m_expression.data(); }
inline CoeffReturnType coeff(Index row, Index col) const
inline const CoeffReturnType coeff(Index row, Index col) const
{
return m_expression.coeff(row, col);
}
@@ -86,7 +77,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
return m_expression.const_cast_derived().coeffRef(row, col);
}
inline CoeffReturnType coeff(Index index) const
inline const CoeffReturnType coeff(Index index) const
{
return m_expression.coeff(index);
}
@@ -128,14 +119,8 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
template<typename Dest>
inline void evalTo(Dest& dst) const { dst = m_expression; }
const typename internal::remove_all<NestedExpressionType>::type&
nestedExpression() const
{
return m_expression;
}
protected:
NestedExpressionType m_expression;
const NestedExpressionType m_expression;
};
/** \class MatrixWrapper
@@ -166,25 +151,16 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper)
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper)
typedef typename internal::conditional<
internal::is_lvalue<ExpressionType>::value,
Scalar,
const Scalar
>::type ScalarWithConstIfNotLvalue;
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
inline MatrixWrapper(const ExpressionType& matrix) : m_expression(matrix) {}
inline Index rows() const { return m_expression.rows(); }
inline Index cols() const { return m_expression.cols(); }
inline Index outerStride() const { return m_expression.outerStride(); }
inline Index innerStride() const { return m_expression.innerStride(); }
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
inline const Scalar* data() const { return m_expression.data(); }
inline CoeffReturnType coeff(Index row, Index col) const
inline const CoeffReturnType coeff(Index row, Index col) const
{
return m_expression.coeff(row, col);
}
@@ -199,7 +175,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
return m_expression.derived().coeffRef(row, col);
}
inline CoeffReturnType coeff(Index index) const
inline const CoeffReturnType coeff(Index index) const
{
return m_expression.coeff(index);
}
@@ -238,14 +214,8 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
m_expression.const_cast_derived().template writePacket<LoadMode>(index, x);
}
const typename internal::remove_all<NestedExpressionType>::type&
nestedExpression() const
{
return m_expression;
}
protected:
NestedExpressionType m_expression;
const NestedExpressionType m_expression;
};
#endif // EIGEN_ARRAYWRAPPER_H

View File

@@ -152,7 +152,7 @@ struct assign_DefaultTraversal_CompleteUnrolling
inner = Index % Derived1::InnerSizeAtCompileTime
};
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
dst.copyCoeffByOuterInner(outer, inner, src);
assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
@@ -162,13 +162,13 @@ struct assign_DefaultTraversal_CompleteUnrolling
template<typename Derived1, typename Derived2, int Stop>
struct assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, int Index, int Stop>
struct assign_DefaultTraversal_InnerUnrolling
{
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, int outer)
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer)
{
dst.copyCoeffByOuterInner(outer, Index, src);
assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
@@ -178,7 +178,7 @@ struct assign_DefaultTraversal_InnerUnrolling
template<typename Derived1, typename Derived2, int Stop>
struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
{
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, int) {}
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
};
/***********************
@@ -188,7 +188,7 @@ struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
template<typename Derived1, typename Derived2, int Index, int Stop>
struct assign_LinearTraversal_CompleteUnrolling
{
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
dst.copyCoeff(Index, src);
assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
@@ -198,7 +198,7 @@ struct assign_LinearTraversal_CompleteUnrolling
template<typename Derived1, typename Derived2, int Stop>
struct assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
};
/**************************
@@ -214,7 +214,7 @@ struct assign_innervec_CompleteUnrolling
JointAlignment = assign_traits<Derived1,Derived2>::JointAlignment
};
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
dst.template copyPacketByOuterInner<Derived2, Aligned, JointAlignment>(outer, inner, src);
assign_innervec_CompleteUnrolling<Derived1, Derived2,
@@ -225,13 +225,13 @@ struct assign_innervec_CompleteUnrolling
template<typename Derived1, typename Derived2, int Stop>
struct assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, int Index, int Stop>
struct assign_innervec_InnerUnrolling
{
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, int outer)
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer)
{
dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src);
assign_innervec_InnerUnrolling<Derived1, Derived2,
@@ -242,7 +242,7 @@ struct assign_innervec_InnerUnrolling
template<typename Derived1, typename Derived2, int Stop>
struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
{
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, int) {}
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
};
/***************************************************************************
@@ -251,25 +251,24 @@ struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
template<typename Derived1, typename Derived2,
int Traversal = assign_traits<Derived1, Derived2>::Traversal,
int Unrolling = assign_traits<Derived1, Derived2>::Unrolling,
int Version = Specialized>
int Unrolling = assign_traits<Derived1, Derived2>::Unrolling>
struct assign_impl;
/************************
*** Default traversal ***
************************/
template<typename Derived1, typename Derived2, int Unrolling, int Version>
struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling, Version>
template<typename Derived1, typename Derived2, int Unrolling>
struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling>
{
static inline void run(Derived1 &, const Derived2 &) { }
inline static void run(Derived1 &, const Derived2 &) { }
};
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
{
typedef typename Derived1::Index Index;
static inline void run(Derived1 &dst, const Derived2 &src)
inline static void run(Derived1 &dst, const Derived2 &src)
{
const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize();
@@ -279,21 +278,21 @@ struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
}
};
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling, Version>
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling>
{
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src);
}
};
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling, Version>
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
{
typedef typename Derived1::Index Index;
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
@@ -306,11 +305,11 @@ struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling, Version
*** Linear traversal ***
***********************/
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling>
{
typedef typename Derived1::Index Index;
static inline void run(Derived1 &dst, const Derived2 &src)
inline static void run(Derived1 &dst, const Derived2 &src)
{
const Index size = dst.size();
for(Index i = 0; i < size; ++i)
@@ -318,10 +317,10 @@ struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
}
};
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling, Version>
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling>
{
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src);
@@ -332,11 +331,11 @@ struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling, Versi
*** Inner vectorization ***
**************************/
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling, Version>
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
{
typedef typename Derived1::Index Index;
static inline void run(Derived1 &dst, const Derived2 &src)
inline static void run(Derived1 &dst, const Derived2 &src)
{
const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize();
@@ -347,21 +346,21 @@ struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling, Ve
}
};
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling, Version>
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling>
{
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src);
}
};
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling, Version>
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling>
{
typedef typename Derived1::Index Index;
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
@@ -399,11 +398,11 @@ struct unaligned_assign_impl<false>
}
};
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling, Version>
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
{
typedef typename Derived1::Index Index;
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
const Index size = dst.size();
typedef packet_traits<typename Derived1::Scalar> PacketTraits;
@@ -413,7 +412,7 @@ struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling, V
srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
};
const Index alignedStart = assign_traits<Derived1,Derived2>::DstIsAligned ? 0
: internal::first_aligned(&dst.coeffRef(0), size);
: first_aligned(&dst.coeffRef(0), size);
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
unaligned_assign_impl<assign_traits<Derived1,Derived2>::DstIsAligned!=0>::run(src,dst,0,alignedStart);
@@ -427,11 +426,11 @@ struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling, V
}
};
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling, Version>
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling>
{
typedef typename Derived1::Index Index;
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
enum { size = Derived1::SizeAtCompileTime,
packetSize = packet_traits<typename Derived1::Scalar>::size,
@@ -446,11 +445,11 @@ struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnroll
*** Slice vectorization ***
***************************/
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling, Version>
template<typename Derived1, typename Derived2>
struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
{
typedef typename Derived1::Index Index;
static inline void run(Derived1 &dst, const Derived2 &src)
inline static void run(Derived1 &dst, const Derived2 &src)
{
typedef packet_traits<typename Derived1::Scalar> PacketTraits;
enum {
@@ -464,7 +463,7 @@ struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling, Ve
const Index outerSize = dst.outerSize();
const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
Index alignedStart = ((!alignable) || assign_traits<Derived1,Derived2>::DstIsAligned) ? 0
: internal::first_aligned(&dst.coeffRef(0,0), innerSize);
: first_aligned(&dst.coeffRef(0,0), innerSize);
for(Index outer = 0; outer < outerSize; ++outer)
{
@@ -532,19 +531,19 @@ struct assign_selector;
template<typename Derived, typename OtherDerived>
struct assign_selector<Derived,OtherDerived,false,false> {
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
};
template<typename Derived, typename OtherDerived>
struct assign_selector<Derived,OtherDerived,true,false> {
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
};
template<typename Derived, typename OtherDerived>
struct assign_selector<Derived,OtherDerived,false,true> {
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
};
template<typename Derived, typename OtherDerived>
struct assign_selector<Derived,OtherDerived,true,true> {
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
};
} // end namespace internal

View File

@@ -1,217 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin()
********************************************************************************
*/
#ifndef EIGEN_ASSIGN_VML_H
#define EIGEN_ASSIGN_VML_H
namespace internal {
template<typename Op> struct vml_call
{ enum { IsSupported = 0 }; };
template<typename Dst, typename Src, typename UnaryOp>
class vml_assign_traits
{
private:
enum {
DstHasDirectAccess = Dst::Flags & DirectAccessBit,
SrcHasDirectAccess = Src::Flags & DirectAccessBit,
StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
: int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
: int(Dst::RowsAtCompileTime),
InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
: int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
: int(Dst::MaxRowsAtCompileTime),
MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
MightEnableVml = vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess
&& Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD,
MayEnableVml = MightEnableVml && LargeEnough,
MayLinearize = MayEnableVml && MightLinearize
};
public:
enum {
Traversal = MayLinearize ? LinearVectorizedTraversal
: MayEnableVml ? InnerVectorizedTraversal
: DefaultTraversal
};
};
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling,
int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal >
struct vml_assign_impl
: assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>
{
};
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>
{
typedef typename Derived1::Scalar Scalar;
typedef typename Derived1::Index Index;
static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
{
// in case we want to (or have to) skip VML at runtime we can call:
// assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer) {
const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :
&(src.nestedExpression().coeffRef(0, outer));
Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));
vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );
}
}
};
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>
{
static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
{
// in case we want to (or have to) skip VML at runtime we can call:
// assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );
}
};
// Macroses
#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \
template<typename Derived1, typename Derived2, typename UnaryOp> \
struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized> { \
static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \
vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \
} \
};
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling)
#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
#define EIGEN_MKL_VML_MODE VML_HA
#else
#define EIGEN_MKL_VML_MODE VML_LA
#endif
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
enum { IsSupported = 1 }; \
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \
} \
};
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
enum { IsSupported = 1 }; \
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \
} \
};
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
enum { IsSupported = 1 }; \
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
EIGENTYPE exponent = func.m_exponent; \
MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \
(VMLTYPE*)dst, &vmlMode); \
} \
};
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan)
//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
} // end namespace internal
#endif // EIGEN_ASSIGN_VML_H

View File

@@ -87,7 +87,7 @@ class BandMatrixBase : public EigenBase<Derived>
if (i<=supers())
{
start = supers()-i;
len = (std::min)(rows(),std::max<Index>(0,coeffs().rows() - (supers()-i)));
len = std::min(rows(),std::max<Index>(0,coeffs().rows() - (supers()-i)));
}
else if (i>=rows()-subs())
len = std::max<Index>(0,coeffs().rows() - (i + 1 - rows() + subs()));
@@ -96,11 +96,11 @@ class BandMatrixBase : public EigenBase<Derived>
/** \returns a vector expression of the main diagonal */
inline Block<CoefficientsType,1,SizeAtCompileTime> diagonal()
{ return Block<CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,(std::min)(rows(),cols())); }
{ return Block<CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,std::min(rows(),cols())); }
/** \returns a vector expression of the main diagonal (const version) */
inline const Block<const CoefficientsType,1,SizeAtCompileTime> diagonal() const
{ return Block<const CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,(std::min)(rows(),cols())); }
{ return Block<const CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,std::min(rows(),cols())); }
template<int Index> struct DiagonalIntReturnType {
enum {
@@ -122,13 +122,13 @@ class BandMatrixBase : public EigenBase<Derived>
/** \returns a vector expression of the \a N -th sub or super diagonal */
template<int N> inline typename DiagonalIntReturnType<N>::Type diagonal()
{
return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N));
return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, std::max(0,N), 1, diagonalLength(N));
}
/** \returns a vector expression of the \a N -th sub or super diagonal */
template<int N> inline const typename DiagonalIntReturnType<N>::Type diagonal() const
{
return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N));
return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, std::max(0,N), 1, diagonalLength(N));
}
/** \returns a vector expression of the \a i -th sub or super diagonal */
@@ -166,7 +166,7 @@ class BandMatrixBase : public EigenBase<Derived>
protected:
inline Index diagonalLength(Index i) const
{ return i<0 ? (std::min)(cols(),rows()+i) : (std::min)(rows(),cols()-i); }
{ return i<0 ? std::min(cols(),rows()+i) : std::min(rows(),cols()-i); }
};
/**
@@ -180,7 +180,7 @@ class BandMatrixBase : public EigenBase<Derived>
* \param Cols Number of columns, or \b Dynamic
* \param Supers Number of super diagonal
* \param Subs Number of sub diagonal
* \param _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
* \param _Options A combination of either \b RowMajor or \b ColMajor, and of \b SelfAdjoint
* The former controls \ref TopicStorageOrders "storage order", and defaults to
* column-major. The latter controls whether the matrix represents a selfadjoint
* matrix in which case either Supers of Subs have to be null.
@@ -284,7 +284,6 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT
: m_coeffs(coeffs),
m_rows(rows), m_supers(supers), m_subs(subs)
{
EIGEN_UNUSED_VARIABLE(cols);
//internal::assert(coeffs.cols()==cols() && (supers()+subs()+1)==coeffs.rows());
}

View File

@@ -94,7 +94,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess>
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
&& (InnerStrideAtCompileTime == 1)
? PacketAccessBit : 0,
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0,
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && ((OuterStrideAtCompileTime % packet_traits<Scalar>::size) == 0)) ? AlignedBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
@@ -242,21 +242,6 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
inline Index outerStride() const;
#endif
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
{
return m_xpr;
}
Index startRow() const
{
return m_startRow.value();
}
Index startCol() const
{
return m_startCol.value();
}
protected:
const typename XprType::Nested m_xpr;
@@ -319,11 +304,6 @@ class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
init();
}
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
{
return m_xpr;
}
/** \sa MapBase::innerStride() */
inline Index innerStride() const
{
@@ -361,7 +341,7 @@ class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
: m_xpr.innerStride();
}
typename XprType::Nested m_xpr;
const typename XprType::Nested m_xpr;
int m_outerStride;
};

View File

@@ -35,7 +35,7 @@ struct all_unroller
row = (UnrollCount-1) % Derived::RowsAtCompileTime
};
static inline bool run(const Derived &mat)
inline static bool run(const Derived &mat)
{
return all_unroller<Derived, UnrollCount-1>::run(mat) && mat.coeff(row, col);
}
@@ -44,13 +44,13 @@ struct all_unroller
template<typename Derived>
struct all_unroller<Derived, 1>
{
static inline bool run(const Derived &mat) { return mat.coeff(0, 0); }
inline static bool run(const Derived &mat) { return mat.coeff(0, 0); }
};
template<typename Derived>
struct all_unroller<Derived, Dynamic>
{
static inline bool run(const Derived &) { return false; }
inline static bool run(const Derived &) { return false; }
};
template<typename Derived, int UnrollCount>
@@ -61,7 +61,7 @@ struct any_unroller
row = (UnrollCount-1) % Derived::RowsAtCompileTime
};
static inline bool run(const Derived &mat)
inline static bool run(const Derived &mat)
{
return any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col);
}
@@ -70,13 +70,13 @@ struct any_unroller
template<typename Derived>
struct any_unroller<Derived, 1>
{
static inline bool run(const Derived &mat) { return mat.coeff(0, 0); }
inline static bool run(const Derived &mat) { return mat.coeff(0, 0); }
};
template<typename Derived>
struct any_unroller<Derived, Dynamic>
{
static inline bool run(const Derived &) { return false; }
inline static bool run(const Derived &) { return false; }
};
} // end namespace internal

View File

@@ -167,8 +167,8 @@ class CwiseBinaryOp : internal::no_assignment_operator,
const BinaryOp& functor() const { return m_functor; }
protected:
LhsNested m_lhs;
RhsNested m_rhs;
const LhsNested m_lhs;
const RhsNested m_rhs;
const BinaryOp m_functor;
};

View File

@@ -101,9 +101,6 @@ class CwiseNullaryOp : internal::no_assignment_operator,
return m_functor.packetOp(index);
}
/** \returns the functor representing the nullary operation */
const NullaryOp& functor() const { return m_functor; }
protected:
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
@@ -745,7 +742,7 @@ struct setIdentity_impl<Derived, true>
static EIGEN_STRONG_INLINE Derived& run(Derived& m)
{
m.setZero();
const Index size = (std::min)(m.rows(), m.cols());
const Index size = std::min(m.rows(), m.cols());
for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
return m;
}

View File

@@ -95,7 +95,7 @@ class CwiseUnaryOp : internal::no_assignment_operator,
nestedExpression() { return m_xpr.const_cast_derived(); }
protected:
typename XprType::Nested m_xpr;
const typename XprType::Nested m_xpr;
const UnaryOp m_functor;
};

View File

@@ -97,7 +97,7 @@ class CwiseUnaryView : internal::no_assignment_operator,
protected:
// FIXME changed from MatrixType::Nested because of a weird compilation error with sun CC
typename internal::nested<MatrixType>::type m_matrix;
const typename internal::nested<MatrixType>::type m_matrix;
ViewOp m_functor;
};

View File

@@ -376,13 +376,12 @@ template<typename Derived> class DenseBase
inline Derived& operator*=(const Scalar& other);
inline Derived& operator/=(const Scalar& other);
typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
/** \returns the matrix or vector obtained by evaluating this expression.
*
* Notice that in the case of a plain matrix or vector (not an expression) this function just returns
* a const reference, in order to avoid a useless copy.
*/
EIGEN_STRONG_INLINE EvalReturnType eval() const
EIGEN_STRONG_INLINE const typename internal::eval<Derived>::type eval() const
{
// Even though MSVC does not honor strong inlining when the return type
// is a dynamic matrix, we desperately need strong inlining for fixed

View File

@@ -35,7 +35,7 @@ template<typename T> struct add_const_on_value_type_if_arithmetic
/** \brief Base class providing read-only coefficient access to matrices and arrays.
* \ingroup Core_Module
* \tparam Derived Type of the derived class
* \tparam #ReadOnlyAccessors Constant indicating read-only access
* \tparam ReadOnlyAccessors Constant indicating read-only access
*
* This class defines the \c operator() \c const function and friends, which can be used to read specific
* entries of a matrix or array.
@@ -212,7 +212,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
* to ensure that a packet really starts there. This method is only available on expressions having the
* PacketAccessBit.
*
* The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
* The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
* the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
* starting at an address which is a multiple of the packet size.
*/
@@ -239,7 +239,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
* to ensure that a packet really starts there. This method is only available on expressions having the
* PacketAccessBit and the LinearAccessBit.
*
* The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
* The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
* the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
* starting at an address which is a multiple of the packet size.
*/
@@ -275,7 +275,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
/** \brief Base class providing read/write coefficient access to matrices and arrays.
* \ingroup Core_Module
* \tparam Derived Type of the derived class
* \tparam #WriteAccessors Constant indicating read/write access
* \tparam WriteAccessors Constant indicating read/write access
*
* This class defines the non-const \c operator() function and friends, which can be used to write specific
* entries of a matrix or array. This class inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which
@@ -433,7 +433,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
* to ensure that a packet really starts there. This method is only available on expressions having the
* PacketAccessBit.
*
* The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
* The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
* the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
* starting at an address which is a multiple of the packet size.
*/
@@ -567,7 +567,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
/** \brief Base class providing direct read-only coefficient access to matrices and arrays.
* \ingroup Core_Module
* \tparam Derived Type of the derived class
* \tparam #DirectAccessors Constant indicating direct access
* \tparam DirectAccessors Constant indicating direct access
*
* This class defines functions to work with strides which can be used to access entries directly. This class
* inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which defines functions to access entries read-only using
@@ -637,7 +637,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
/** \brief Base class providing direct read/write coefficient access to matrices and arrays.
* \ingroup Core_Module
* \tparam Derived Type of the derived class
* \tparam #DirectWriteAccessors Constant indicating direct access
* \tparam DirectAccessors Constant indicating direct access
*
* This class defines functions to work with strides which can be used to access entries directly. This class
* inherits DenseCoeffsBase<Derived, WriteAccessors> which defines functions to access entries read/write using
@@ -710,16 +710,16 @@ namespace internal {
template<typename Derived, bool JustReturnZero>
struct first_aligned_impl
{
static inline typename Derived::Index run(const Derived&)
inline static typename Derived::Index run(const Derived&)
{ return 0; }
};
template<typename Derived>
struct first_aligned_impl<Derived, false>
{
static inline typename Derived::Index run(const Derived& m)
inline static typename Derived::Index run(const Derived& m)
{
return internal::first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
return first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
}
};
@@ -729,7 +729,7 @@ struct first_aligned_impl<Derived, false>
* documentation.
*/
template<typename Derived>
static inline typename Derived::Index first_aligned(const Derived& m)
inline static typename Derived::Index first_aligned(const Derived& m)
{
return first_aligned_impl
<Derived, (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>

View File

@@ -58,7 +58,7 @@ struct plain_array
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
eigen_assert((reinterpret_cast<size_t>(array) & sizemask) == 0 \
&& "this assertion is explained here: " \
"http://eigen.tuxfamily.org/dox-devel/TopicUnalignedArrayAssert.html" \
"http://eigen.tuxfamily.org/dox/UnalignedArrayAssert.html" \
" **** READ THIS WEB PAGE !!! ****");
#endif
@@ -104,8 +104,8 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
: m_data(internal::constructor_without_unaligned_array_assert()) {}
inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
static inline DenseIndex rows(void) {return _Rows;}
static inline DenseIndex cols(void) {return _Cols;}
inline static DenseIndex rows(void) {return _Rows;}
inline static DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
inline const T *data() const { return m_data.array; }
@@ -120,24 +120,14 @@ template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0
inline DenseStorage(internal::constructor_without_unaligned_array_assert) {}
inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
inline void swap(DenseStorage& ) {}
static inline DenseIndex rows(void) {return _Rows;}
static inline DenseIndex cols(void) {return _Cols;}
inline static DenseIndex rows(void) {return _Rows;}
inline static DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
inline const T *data() const { return 0; }
inline T *data() { return 0; }
};
// more specializations for null matrices; these are necessary to resolve ambiguities
template<typename T, int _Options> class DenseStorage<T, 0, Dynamic, Dynamic, _Options>
: public DenseStorage<T, 0, 0, 0, _Options> { };
template<typename T, int _Rows, int _Options> class DenseStorage<T, 0, _Rows, Dynamic, _Options>
: public DenseStorage<T, 0, 0, 0, _Options> { };
template<typename T, int _Cols, int _Options> class DenseStorage<T, 0, Dynamic, _Cols, _Options>
: public DenseStorage<T, 0, 0, 0, _Options> { };
// dynamic-size matrix with fixed-size storage
template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic, Dynamic, _Options>
{
@@ -251,7 +241,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
static inline DenseIndex rows(void) {return _Rows;}
inline static DenseIndex rows(void) {return _Rows;}
inline DenseIndex cols(void) const {return m_cols;}
inline void conservativeResize(DenseIndex size, DenseIndex, DenseIndex cols)
{
@@ -288,7 +278,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
inline DenseIndex rows(void) const {return m_rows;}
static inline DenseIndex cols(void) {return _Cols;}
inline static DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex size, DenseIndex rows, DenseIndex)
{
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);

View File

@@ -2,7 +2,6 @@
// for linear algebra.
//
// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -88,7 +87,7 @@ template<typename MatrixType, int DiagIndex> class Diagonal
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
inline Index rows() const
{ return m_index.value()<0 ? (std::min)(m_matrix.cols(),m_matrix.rows()+m_index.value()) : (std::min)(m_matrix.rows(),m_matrix.cols()-m_index.value()); }
{ return m_index.value()<0 ? std::min(m_matrix.cols(),m_matrix.rows()+m_index.value()) : std::min(m_matrix.rows(),m_matrix.cols()-m_index.value()); }
inline Index cols() const { return 1; }
@@ -102,15 +101,6 @@ template<typename MatrixType, int DiagIndex> class Diagonal
return 0;
}
typedef typename internal::conditional<
internal::is_lvalue<MatrixType>::value,
Scalar,
const Scalar
>::type ScalarWithConstIfNotLvalue;
inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
inline Scalar& coeffRef(Index row, Index)
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
@@ -143,19 +133,8 @@ template<typename MatrixType, int DiagIndex> class Diagonal
return m_matrix.coeff(index+rowOffset(), index+colOffset());
}
const typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() const
{
return m_matrix;
}
int index() const
{
return m_index.value();
}
protected:
typename MatrixType::Nested m_matrix;
const typename MatrixType::Nested m_matrix;
const internal::variable_if_dynamic<Index, DiagIndex> m_index;
private:

View File

@@ -72,7 +72,7 @@ class DiagonalBase : public EigenBase<Derived>
const DiagonalProduct<MatrixDerived, Derived, OnTheLeft>
operator*(const MatrixBase<MatrixDerived> &matrix) const;
inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
inline const DiagonalWrapper<CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
inverse() const
{
return diagonal().cwiseInverse();
@@ -251,13 +251,13 @@ class DiagonalWrapper
#endif
/** Constructor from expression of diagonal coefficients to wrap. */
inline DiagonalWrapper(DiagonalVectorType& diagonal) : m_diagonal(diagonal) {}
inline DiagonalWrapper(const DiagonalVectorType& diagonal) : m_diagonal(diagonal) {}
/** \returns a const reference to the wrapped expression of diagonal coefficients. */
const DiagonalVectorType& diagonal() const { return m_diagonal; }
protected:
typename DiagonalVectorType::Nested m_diagonal;
const typename DiagonalVectorType::Nested m_diagonal;
};
/** \returns a pseudo-expression of a diagonal matrix with *this as vector of diagonal coefficients

View File

@@ -107,8 +107,8 @@ class DiagonalProduct : internal::no_assignment_operator,
m_diagonal.diagonal().template packet<DiagonalVectorPacketLoadMode>(id));
}
typename MatrixType::Nested m_matrix;
typename DiagonalType::Nested m_diagonal;
const typename MatrixType::Nested m_matrix;
const typename DiagonalType::Nested m_diagonal;
};
/** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal.

View File

@@ -116,9 +116,7 @@ MatrixBase<Derived>::eigen2_dot(const MatrixBase<OtherDerived>& other) const
//---------- implementation of L2 norm and related functions ----------
/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm.
* In both cases, it consists in the sum of the square of all the matrix entries.
* For vectors, this is also equals to the dot product of \c *this with itself.
/** \returns the squared \em l2 norm of *this, i.e., for vectors, the dot product of *this with itself.
*
* \sa dot(), norm()
*/
@@ -128,9 +126,7 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
return internal::real((*this).cwiseAbs2().sum());
}
/** \returns, for vectors, the \em l2 norm of \c *this, and for matrices the Frobenius norm.
* In both cases, it consists in the square root of the sum of the square of all the matrix entries.
* For vectors, this is also equals to the square root of the dot product of \c *this with itself.
/** \returns the \em l2 norm of *this, i.e., for vectors, the square root of the dot product of *this with itself.
*
* \sa dot(), squaredNorm()
*/
@@ -176,7 +172,7 @@ template<typename Derived, int p>
struct lpNorm_selector
{
typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
static inline RealScalar run(const MatrixBase<Derived>& m)
inline static RealScalar run(const MatrixBase<Derived>& m)
{
return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p);
}
@@ -185,7 +181,7 @@ struct lpNorm_selector
template<typename Derived>
struct lpNorm_selector<Derived, 1>
{
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
{
return m.cwiseAbs().sum();
}
@@ -194,7 +190,7 @@ struct lpNorm_selector<Derived, 1>
template<typename Derived>
struct lpNorm_selector<Derived, 2>
{
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
{
return m.norm();
}
@@ -203,7 +199,7 @@ struct lpNorm_selector<Derived, 2>
template<typename Derived>
struct lpNorm_selector<Derived, Infinity>
{
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
{
return m.cwiseAbs().maxCoeff();
}

View File

@@ -116,7 +116,7 @@ struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
*/
template<typename Scalar> struct scalar_min_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::min; return (min)(a, b); }
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return std::min(a, b); }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return internal::pmin(a,b); }
@@ -139,7 +139,7 @@ struct functor_traits<scalar_min_op<Scalar> > {
*/
template<typename Scalar> struct scalar_max_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::max; return (max)(a, b); }
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return std::max(a, b); }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return internal::pmax(a,b); }
@@ -165,10 +165,8 @@ template<typename Scalar> struct scalar_hypot_op {
// typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
{
using std::max;
using std::min;
Scalar p = (max)(_x, _y);
Scalar q = (min)(_x, _y);
Scalar p = std::max(_x, _y);
Scalar q = std::min(_x, _y);
Scalar qp = q/p;
return p * sqrt(Scalar(1) + qp*qp);
}
@@ -220,38 +218,6 @@ struct functor_traits<scalar_quotient_op<Scalar> > {
};
};
/** \internal
* \brief Template functor to compute the and of two booleans
*
* \sa class CwiseBinaryOp, ArrayBase::operator&&
*/
struct scalar_boolean_and_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
};
template<> struct functor_traits<scalar_boolean_and_op> {
enum {
Cost = NumTraits<bool>::AddCost,
PacketAccess = false
};
};
/** \internal
* \brief Template functor to compute the or of two booleans
*
* \sa class CwiseBinaryOp, ArrayBase::operator||
*/
struct scalar_boolean_or_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
};
template<> struct functor_traits<scalar_boolean_or_op> {
enum {
Cost = NumTraits<bool>::AddCost,
PacketAccess = false
};
};
// unary functors:
/** \internal
@@ -639,7 +605,7 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op
EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const
{
eigen_assert(col==0 || row==0);
return impl.packetOp(col + row);
return impl(col + row);
}
// This proxy object handles the actual required temporaries, the different
@@ -784,9 +750,9 @@ struct functor_traits<scalar_acos_op<Scalar> >
*/
template<typename Scalar> struct scalar_asin_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
inline const Scalar operator() (const Scalar& a) const { return asin(a); }
inline const Scalar operator() (const Scalar& a) const { return acos(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
};
template<typename Scalar>
struct functor_traits<scalar_asin_op<Scalar> >

View File

@@ -34,10 +34,9 @@ struct isApprox_selector
{
static bool run(const Derived& x, const OtherDerived& y, typename Derived::RealScalar prec)
{
using std::min;
typename internal::nested<Derived,2>::type nested(x);
typename internal::nested<OtherDerived,2>::type otherNested(y);
return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
const typename internal::nested<Derived,2>::type nested(x);
const typename internal::nested<OtherDerived,2>::type otherNested(y);
return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * std::min(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
}
};

View File

@@ -1,624 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_GENERAL_PRODUCT_H
#define EIGEN_GENERAL_PRODUCT_H
/** \class GeneralProduct
* \ingroup Core_Module
*
* \brief Expression of the product of two general matrices or vectors
*
* \param LhsNested the type used to store the left-hand side
* \param RhsNested the type used to store the right-hand side
* \param ProductMode the type of the product
*
* This class represents an expression of the product of two general matrices.
* We call a general matrix, a dense matrix with full storage. For instance,
* This excludes triangular, selfadjoint, and sparse matrices.
* It is the return type of the operator* between general matrices. Its template
* arguments are determined automatically by ProductReturnType. Therefore,
* GeneralProduct should never be used direclty. To determine the result type of a
* function which involves a matrix product, use ProductReturnType::Type.
*
* \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
*/
template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value>
class GeneralProduct;
enum {
Large = 2,
Small = 3
};
namespace internal {
template<int Rows, int Cols, int Depth> struct product_type_selector;
template<int Size, int MaxSize> struct product_size_category
{
enum { is_large = MaxSize == Dynamic ||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
value = is_large ? Large
: Size == 1 ? 1
: Small
};
};
template<typename Lhs, typename Rhs> struct product_type
{
typedef typename remove_all<Lhs>::type _Lhs;
typedef typename remove_all<Rhs>::type _Rhs;
enum {
MaxRows = _Lhs::MaxRowsAtCompileTime,
Rows = _Lhs::RowsAtCompileTime,
MaxCols = _Rhs::MaxColsAtCompileTime,
Cols = _Rhs::ColsAtCompileTime,
MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
_Rhs::MaxRowsAtCompileTime),
Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
_Rhs::RowsAtCompileTime),
LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
};
// the splitting into different lines of code here, introducing the _select enums and the typedef below,
// is to work around an internal compiler error with gcc 4.1 and 4.2.
private:
enum {
rows_select = product_size_category<Rows,MaxRows>::value,
cols_select = product_size_category<Cols,MaxCols>::value,
depth_select = product_size_category<Depth,MaxDepth>::value
};
typedef product_type_selector<rows_select, cols_select, depth_select> selector;
public:
enum {
value = selector::ret
};
#ifdef EIGEN_DEBUG_PRODUCT
static void debug()
{
EIGEN_DEBUG_VAR(Rows);
EIGEN_DEBUG_VAR(Cols);
EIGEN_DEBUG_VAR(Depth);
EIGEN_DEBUG_VAR(rows_select);
EIGEN_DEBUG_VAR(cols_select);
EIGEN_DEBUG_VAR(depth_select);
EIGEN_DEBUG_VAR(value);
}
#endif
};
/* The following allows to select the kind of product at compile time
* based on the three dimensions of the product.
* This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
// FIXME I'm not sure the current mapping is the ideal one.
template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; };
template<> struct product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
} // end namespace internal
/** \class ProductReturnType
* \ingroup Core_Module
*
* \brief Helper class to get the correct and optimized returned type of operator*
*
* \param Lhs the type of the left-hand side
* \param Rhs the type of the right-hand side
* \param ProductMode the type of the product (determined automatically by internal::product_mode)
*
* This class defines the typename Type representing the optimized product expression
* between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
* is the recommended way to define the result type of a function returning an expression
* which involve a matrix product. The class Product should never be
* used directly.
*
* \sa class Product, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
*/
template<typename Lhs, typename Rhs, int ProductType>
struct ProductReturnType
{
// TODO use the nested type to reduce instanciations ????
// typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
// typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type;
};
template<typename Lhs, typename Rhs>
struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
{
typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
};
template<typename Lhs, typename Rhs>
struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
{
typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
};
// this is a workaround for sun CC
template<typename Lhs, typename Rhs>
struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
{};
/***********************************************************************
* Implementation of Inner Vector Vector Product
***********************************************************************/
// FIXME : maybe the "inner product" could return a Scalar
// instead of a 1x1 matrix ??
// Pro: more natural for the user
// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
// product ends up to a row-vector times col-vector product... To tackle this use
// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
namespace internal {
template<typename Lhs, typename Rhs>
struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
: traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
{};
}
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, InnerProduct>
: internal::no_assignment_operator,
public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
{
typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
public:
GeneralProduct(const Lhs& lhs, const Rhs& rhs)
{
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
}
/** Convertion to scalar */
operator const typename Base::Scalar() const {
return Base::coeff(0,0);
}
};
/***********************************************************************
* Implementation of Outer Vector Vector Product
***********************************************************************/
namespace internal {
template<int StorageOrder> struct outer_product_selector;
template<typename Lhs, typename Rhs>
struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
: traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
{};
}
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, OuterProduct>
: public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
{
public:
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
}
template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
{
internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha);
}
};
namespace internal {
template<> struct outer_product_selector<ColMajor> {
template<typename ProductType, typename Dest>
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
typedef typename Dest::Index Index;
// FIXME make sure lhs is sequentially stored
// FIXME not very good if rhs is real and lhs complex while alpha is real too
const Index cols = dest.cols();
for (Index j=0; j<cols; ++j)
dest.col(j) += (alpha * prod.rhs().coeff(j)) * prod.lhs();
}
};
template<> struct outer_product_selector<RowMajor> {
template<typename ProductType, typename Dest>
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
typedef typename Dest::Index Index;
// FIXME make sure rhs is sequentially stored
// FIXME not very good if lhs is real and rhs complex while alpha is real too
const Index rows = dest.rows();
for (Index i=0; i<rows; ++i)
dest.row(i) += (alpha * prod.lhs().coeff(i)) * prod.rhs();
}
};
} // end namespace internal
/***********************************************************************
* Implementation of General Matrix Vector Product
***********************************************************************/
/* According to the shape/flags of the matrix we have to distinghish 3 different cases:
* 1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine
* 2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine
* 3 - all other cases are handled using a simple loop along the outer-storage direction.
* Therefore we need a lower level meta selector.
* Furthermore, if the matrix is the rhs, then the product has to be transposed.
*/
namespace internal {
template<typename Lhs, typename Rhs>
struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
: traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
{};
template<int Side, int StorageOrder, bool BlasCompatible>
struct gemv_selector;
} // end namespace internal
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, GemvProduct>
: public ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs>
{
public:
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{
// EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value),
// YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
}
enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
}
};
namespace internal {
// The vector is on the left => transposition
template<int StorageOrder, bool BlasCompatible>
struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
Transpose<Dest> destT(dest);
enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct>
(prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha);
}
};
template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
{
EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
};
template<typename Scalar,int Size>
struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
{
EIGEN_STRONG_INLINE Scalar* data() { return 0; }
};
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
{
#if EIGEN_ALIGN_STATICALLY
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
#else
// Some architectures cannot align on the stack,
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
enum {
ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
PacketSize = internal::packet_traits<Scalar>::size
};
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() {
return ForceAlignment
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(15))) + 16)
: m_data.array;
}
#endif
};
template<> struct gemv_selector<OnTheRight,ColMajor,true>
{
template<typename ProductType, typename Dest>
static inline void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename ProductType::Index Index;
typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
typedef typename ProductType::Scalar ResScalar;
typedef typename ProductType::RealScalar RealScalar;
typedef typename ProductType::ActualLhsType ActualLhsType;
typedef typename ProductType::ActualRhsType ActualRhsType;
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
};
gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
bool alphaIsCompatible = (!ComplexByReal) || (imag(actualAlpha)==RealScalar(0));
bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
evalToDest ? dest.data() : static_dest.data());
if(!evalToDest)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if(!alphaIsCompatible)
{
MappedDest(actualDestPtr, dest.size()).setZero();
compatibleAlpha = RhsScalar(1);
}
else
MappedDest(actualDestPtr, dest.size()) = dest;
}
general_matrix_vector_product
<Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
actualLhs.data(), actualLhs.outerStride(),
actualRhs.data(), actualRhs.innerStride(),
actualDestPtr, 1,
compatibleAlpha);
if (!evalToDest)
{
if(!alphaIsCompatible)
dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
else
dest = MappedDest(actualDestPtr, dest.size());
}
}
};
template<> struct gemv_selector<OnTheRight,RowMajor,true>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
typedef typename ProductType::Scalar ResScalar;
typedef typename ProductType::Index Index;
typedef typename ProductType::ActualLhsType ActualLhsType;
typedef typename ProductType::ActualRhsType ActualRhsType;
typedef typename ProductType::_ActualRhsType _ActualRhsType;
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
};
gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
if(!DirectlyUseRhs)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = actualRhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
}
general_matrix_vector_product
<Index,LhsScalar,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
actualLhs.data(), actualLhs.outerStride(),
actualRhsPtr, 1,
dest.data(), dest.innerStride(),
actualAlpha);
}
};
template<> struct gemv_selector<OnTheRight,ColMajor,false>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename Dest::Index Index;
// TODO makes sure dest is sequentially stored in memory, otherwise use a temp
const Index size = prod.rhs().rows();
for(Index k=0; k<size; ++k)
dest += (alpha*prod.rhs().coeff(k)) * prod.lhs().col(k);
}
};
template<> struct gemv_selector<OnTheRight,RowMajor,false>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename Dest::Index Index;
// TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
const Index rows = prod.rows();
for(Index i=0; i<rows; ++i)
dest.coeffRef(i) += alpha * (prod.lhs().row(i).cwiseProduct(prod.rhs().transpose())).sum();
}
};
} // end namespace internal
/***************************************************************************
* Implementation of matrix base methods
***************************************************************************/
/** \returns the matrix product of \c *this and \a other.
*
* \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*().
*
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
*/
template<typename Derived>
template<typename OtherDerived>
inline const typename ProductReturnType<Derived, OtherDerived>::Type
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
{
// A note regarding the function declaration: In MSVC, this function will sometimes
// not be inlined since DenseStorage is an unwindable object for dynamic
// matrices and product types are holding a member to store the result.
// Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
enum {
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|| OtherDerived::RowsAtCompileTime==Dynamic
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
};
// note to the lost user:
// * for a dot product use: v1.dot(v2)
// * for a coeff-wise product use: v1.cwiseProduct(v2)
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
#ifdef EIGEN_DEBUG_PRODUCT
internal::product_type<Derived,OtherDerived>::debug();
#endif
return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
}
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
*
* The returned product will behave like any other expressions: the coefficients of the product will be
* computed once at a time as requested. This might be useful in some extremely rare cases when only
* a small and no coherent fraction of the result's coefficients have to be computed.
*
* \warning This version of the matrix product can be much much slower. So use it only if you know
* what you are doing and that you measured a true speed improvement.
*
* \sa operator*(const MatrixBase&)
*/
template<typename Derived>
template<typename OtherDerived>
const typename LazyProductReturnType<Derived,OtherDerived>::Type
MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
{
enum {
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|| OtherDerived::RowsAtCompileTime==Dynamic
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
};
// note to the lost user:
// * for a dot product use: v1.dot(v2)
// * for a coeff-wise product use: v1.cwiseProduct(v2)
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
}
#endif // EIGEN_PRODUCT_H

View File

@@ -134,12 +134,12 @@ pdiv(const Packet& a,
/** \internal \returns the min of \a a and \a b (coeff-wise) */
template<typename Packet> inline Packet
pmin(const Packet& a,
const Packet& b) { using std::min; return (min)(a, b); }
const Packet& b) { return std::min(a, b); }
/** \internal \returns the max of \a a and \a b (coeff-wise) */
template<typename Packet> inline Packet
pmax(const Packet& a,
const Packet& b) { using std::max; return (max)(a, b); }
const Packet& b) { return std::max(a, b); }
/** \internal \returns the absolute value of \a a */
template<typename Packet> inline Packet
@@ -286,7 +286,7 @@ pmadd(const Packet& a,
{ return padd(pmul(a, b),c); }
/** \internal \returns a packet version of \a *from.
* If LoadMode equals #Aligned, \a from must be 16 bytes aligned */
* \If LoadMode equals Aligned, \a from must be 16 bytes aligned */
template<typename Packet, int LoadMode>
inline Packet ploadt(const typename unpacket_traits<Packet>::type* from)
{
@@ -297,7 +297,7 @@ inline Packet ploadt(const typename unpacket_traits<Packet>::type* from)
}
/** \internal copy the packet \a from to \a *to.
* If StoreMode equals #Aligned, \a to must be 16 bytes aligned */
* If StoreMode equals Aligned, \a to must be 16 bytes aligned */
template<typename Scalar, typename Packet, int LoadMode>
inline void pstoret(Scalar* to, const Packet& from)
{
@@ -312,7 +312,7 @@ template<int Offset,typename PacketType>
struct palign_impl
{
// by default data are aligned, so there is nothing to be done :)
static inline void run(PacketType&, const PacketType&) {}
inline static void run(PacketType&, const PacketType&) {}
};
/** \internal update \a first using the concatenation of the \a Offset last elements

View File

@@ -141,8 +141,7 @@ struct significant_decimals_default_impl
typedef typename NumTraits<Scalar>::Real RealScalar;
static inline int run()
{
using std::ceil;
return cast<RealScalar,int>(ceil(-log(NumTraits<RealScalar>::epsilon())/log(RealScalar(10))));
return cast<RealScalar,int>(std::ceil(-log(NumTraits<RealScalar>::epsilon())/log(RealScalar(10))));
}
};
@@ -171,7 +170,7 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
return s;
}
typename Derived::Nested m = _m;
const typename Derived::Nested m = _m;
typedef typename Derived::Scalar Scalar;
typedef typename Derived::Index Index;

View File

@@ -31,10 +31,10 @@
*
* \brief A matrix or vector expression mapping an existing array of data.
*
* \tparam PlainObjectType the equivalent matrix type of the mapped data
* \tparam MapOptions specifies whether the pointer is \c #Aligned, or \c #Unaligned.
* The default is \c #Unaligned.
* \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout
* \param PlainObjectType the equivalent matrix type of the mapped data
* \param MapOptions specifies whether the pointer is \c Aligned, or \c Unaligned.
* The default is \c Unaligned.
* \param StrideType optionnally specifies strides. By default, Map assumes the memory layout
* of an ordinary, contiguous array. This can be overridden by specifying strides.
* The type passed here must be a specialization of the Stride template, see examples below.
*
@@ -72,9 +72,9 @@
* Example: \include Map_placement_new.cpp
* Output: \verbinclude Map_placement_new.out
*
* This class is the return type of PlainObjectBase::Map() but can also be used directly.
* This class is the return type of Matrix::Map() but can also be used directly.
*
* \sa PlainObjectBase::Map(), \ref TopicStorageOrders
* \sa Matrix::Map(), \ref TopicStorageOrders
*/
namespace internal {

View File

@@ -170,8 +170,8 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit,
internal::inner_stride_at_compile_time<Derived>::ret==1),
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % 16) == 0)
&& "data is not aligned");
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % (sizeof(Scalar)*internal::packet_traits<Scalar>::size)) == 0)
&& "data is not aligned");
}
PointerType m_data;
@@ -238,7 +238,7 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
(this->m_data + index * innerStride(), x);
}
explicit inline MapBase(PointerType data) : Base(data) {}
inline MapBase(PointerType data) : Base(data) {}
inline MapBase(PointerType data, Index size) : Base(data, size) {}
inline MapBase(PointerType data, Index rows, Index cols) : Base(data, rows, cols) {}

View File

@@ -87,8 +87,7 @@ struct real_impl<std::complex<RealScalar> >
{
static inline RealScalar run(const std::complex<RealScalar>& x)
{
using std::real;
return real(x);
return std::real(x);
}
};
@@ -123,8 +122,7 @@ struct imag_impl<std::complex<RealScalar> >
{
static inline RealScalar run(const std::complex<RealScalar>& x)
{
using std::imag;
return imag(x);
return std::imag(x);
}
};
@@ -246,8 +244,7 @@ struct conj_impl<std::complex<RealScalar> >
{
static inline std::complex<RealScalar> run(const std::complex<RealScalar>& x)
{
using std::conj;
return conj(x);
return std::conj(x);
}
};
@@ -273,8 +270,7 @@ struct abs_impl
typedef typename NumTraits<Scalar>::Real RealScalar;
static inline RealScalar run(const Scalar& x)
{
using std::abs;
return abs(x);
return std::abs(x);
}
};
@@ -309,7 +305,7 @@ struct abs2_impl<std::complex<RealScalar> >
{
static inline RealScalar run(const std::complex<RealScalar>& x)
{
return real(x)*real(x) + imag(x)*imag(x);
return std::norm(x);
}
};
@@ -373,12 +369,10 @@ struct hypot_impl
typedef typename NumTraits<Scalar>::Real RealScalar;
static inline RealScalar run(const Scalar& x, const Scalar& y)
{
using std::max;
using std::min;
RealScalar _x = abs(x);
RealScalar _y = abs(y);
RealScalar p = (max)(_x, _y);
RealScalar q = (min)(_x, _y);
RealScalar p = std::max(_x, _y);
RealScalar q = std::min(_x, _y);
RealScalar qp = q/p;
return p * sqrt(RealScalar(1) + qp*qp);
}
@@ -426,8 +420,7 @@ struct sqrt_default_impl
{
static inline Scalar run(const Scalar& x)
{
using std::sqrt;
return sqrt(x);
return std::sqrt(x);
}
};
@@ -467,7 +460,7 @@ inline EIGEN_MATHFUNC_RETVAL(sqrt, Scalar) sqrt(const Scalar& x)
// This macro instanciate all the necessary template mechanism which is common to all unary real functions.
#define EIGEN_MATHFUNC_STANDARD_REAL_UNARY(NAME) \
template<typename Scalar, bool IsInteger> struct NAME##_default_impl { \
static inline Scalar run(const Scalar& x) { using std::NAME; return NAME(x); } \
static inline Scalar run(const Scalar& x) { return std::NAME(x); } \
}; \
template<typename Scalar> struct NAME##_default_impl<Scalar, true> { \
static inline Scalar run(const Scalar&) { \
@@ -502,8 +495,7 @@ struct atan2_default_impl
typedef Scalar retval;
static inline Scalar run(const Scalar& x, const Scalar& y)
{
using std::atan2;
return atan2(x, y);
return std::atan2(x, y);
}
};
@@ -542,8 +534,7 @@ struct pow_default_impl
typedef Scalar retval;
static inline Scalar run(const Scalar& x, const Scalar& y)
{
using std::pow;
return pow(x, y);
return std::pow(x, y);
}
};
@@ -552,7 +543,7 @@ struct pow_default_impl<Scalar, true>
{
static inline Scalar run(Scalar x, Scalar y)
{
Scalar res(1);
Scalar res = 1;
eigen_assert(!NumTraits<Scalar>::IsSigned || y >= 0);
if(y & 1) res *= x;
y >>= 1;
@@ -735,8 +726,7 @@ struct scalar_fuzzy_default_impl<Scalar, false, false>
}
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
{
using std::min;
return abs(x - y) <= (min)(abs(x), abs(y)) * prec;
return abs(x - y) <= std::min(abs(x), abs(y)) * prec;
}
static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec)
{
@@ -774,8 +764,7 @@ struct scalar_fuzzy_default_impl<Scalar, true, false>
}
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
{
using std::min;
return abs2(x - y) <= (min)(abs2(x), abs2(y)) * prec * prec;
return abs2(x - y) <= std::min(abs2(x), abs2(y)) * prec * prec;
}
};
@@ -837,17 +826,6 @@ template<> struct scalar_fuzzy_impl<bool>
};
/****************************************************************************
* Special functions *
****************************************************************************/
// std::isfinite is non standard, so let's define our own version,
// even though it is not very efficient.
template<typename T> bool isfinite(const T& x)
{
return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
}
} // end namespace internal
#endif // EIGEN_MATHFUNCTIONS_H

View File

@@ -43,8 +43,8 @@
* \tparam _Cols Number of columns, or \b Dynamic
*
* The remaining template parameters are optional -- in most cases you don't have to worry about them.
* \tparam _Options \anchor matrix_tparam_options A combination of either \b #RowMajor or \b #ColMajor, and of either
* \b #AutoAlign or \b #DontAlign.
* \tparam _Options \anchor matrix_tparam_options A combination of either \b RowMajor or \b ColMajor, and of either
* \b AutoAlign or \b DontAlign.
* The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required
* for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size.
* \tparam _MaxRows Maximum number of rows. Defaults to \a _Rows (\ref maxrows "note").
@@ -153,6 +153,10 @@ class Matrix
typedef typename Base::PlainObject PlainObject;
enum { NeedsToAlign = (!(Options&DontAlign))
&& SizeAtCompileTime!=Dynamic && ((static_cast<int>(sizeof(Scalar))*SizeAtCompileTime)%16)==0 };
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
using Base::base;
using Base::coeffRef;
@@ -411,6 +415,25 @@ EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
#undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES
#undef EIGEN_MAKE_TYPEDEFS
#undef EIGEN_MAKE_FIXED_TYPEDEFS
#undef EIGEN_MAKE_TYPEDEFS_LARGE
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
using Eigen::Matrix##SizeSuffix##TypeSuffix; \
using Eigen::Vector##SizeSuffix##TypeSuffix; \
using Eigen::RowVector##SizeSuffix##TypeSuffix;
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \
#define EIGEN_USING_MATRIX_TYPEDEFS \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd)
#endif // EIGEN_MATRIX_H

View File

@@ -111,7 +111,7 @@ template<typename Derived> class MatrixBase
/** \returns the size of the main diagonal, which is min(rows(),cols()).
* \sa rows(), cols(), SizeAtCompileTime. */
inline Index diagonalSize() const { return (std::min)(rows(),cols()); }
inline Index diagonalSize() const { return std::min(rows(),cols()); }
/** \brief The plain matrix type corresponding to this expression.
*
@@ -330,7 +330,7 @@ template<typename Derived> class MatrixBase
/** \returns an \link ArrayBase Array \endlink expression of this matrix
* \sa ArrayBase::matrix() */
ArrayWrapper<Derived> array() { return derived(); }
const ArrayWrapper<const Derived> array() const { return derived(); }
const ArrayWrapper<Derived> array() const { return derived(); }
/////////// LU module ///////////
@@ -465,8 +465,6 @@ template<typename Derived> class MatrixBase
const MatrixFunctionReturnValue<Derived> sinh() const;
const MatrixFunctionReturnValue<Derived> cos() const;
const MatrixFunctionReturnValue<Derived> sin() const;
const MatrixSquareRootReturnValue<Derived> sqrt() const;
const MatrixLogarithmReturnValue<Derived> log() const;
#ifdef EIGEN2_SUPPORT
template<typename ProductDerived, typename Lhs, typename Rhs>
@@ -513,10 +511,10 @@ template<typename Derived> class MatrixBase
protected:
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& )
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator-=(const ArrayBase<OtherDerived>& )
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
};
#endif // EIGEN_MATRIXBASE_H

View File

@@ -81,14 +81,14 @@ template<typename T> struct GenericNumTraits
>::type NonInteger;
typedef T Nested;
static inline Real epsilon() { return std::numeric_limits<T>::epsilon(); }
static inline Real dummy_precision()
inline static Real epsilon() { return std::numeric_limits<T>::epsilon(); }
inline static Real dummy_precision()
{
// make sure to override this for floating-point types
return Real(0);
}
static inline T highest() { return (std::numeric_limits<T>::max)(); }
static inline T lowest() { return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)()); }
inline static T highest() { return std::numeric_limits<T>::max(); }
inline static T lowest() { return IsInteger ? std::numeric_limits<T>::min() : (-std::numeric_limits<T>::max()); }
#ifdef EIGEN2_SUPPORT
enum {
@@ -104,12 +104,12 @@ template<typename T> struct NumTraits : GenericNumTraits<T>
template<> struct NumTraits<float>
: GenericNumTraits<float>
{
static inline float dummy_precision() { return 1e-5f; }
inline static float dummy_precision() { return 1e-5f; }
};
template<> struct NumTraits<double> : GenericNumTraits<double>
{
static inline double dummy_precision() { return 1e-12; }
inline static double dummy_precision() { return 1e-12; }
};
template<> struct NumTraits<long double>
@@ -130,8 +130,8 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
};
static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
inline static Real epsilon() { return NumTraits<Real>::epsilon(); }
inline static Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
};
template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>

View File

@@ -511,7 +511,7 @@ class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesTyp
protected:
typename IndicesType::Nested m_indices;
const typename IndicesType::Nested m_indices;
};
/** \returns the matrix with the permutation applied to the columns.
@@ -608,7 +608,7 @@ struct permut_matrix_product_retval
protected:
const PermutationType& m_permutation;
typename MatrixType::Nested m_matrix;
const typename MatrixType::Nested m_matrix;
};
/* Template partial specialization for transposed/inverse permutations */

View File

@@ -34,26 +34,13 @@
namespace internal {
template<typename Index>
EIGEN_ALWAYS_INLINE void check_rows_cols_for_overflow(Index rows, Index cols)
{
// http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
// we assume Index is signed
Index max_index = (size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed
bool error = (rows < 0 || cols < 0) ? true
: (rows == 0 || cols == 0) ? false
: (rows > max_index / cols);
if (error)
throw_std_bad_alloc();
}
template <typename Derived, typename OtherDerived = Derived, bool IsVector = static_cast<bool>(Derived::IsVectorAtCompileTime)> struct conservative_resize_like_impl;
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct matrix_swap_impl;
} // end namespace internal
/** \class PlainObjectBase
/**
* \brief %Dense storage base class for matrices and arrays.
*
* This class can be extended with the help of the plugin mechanism described on the page
@@ -61,29 +48,8 @@ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct m
*
* \sa \ref TopicClassHierarchy
*/
#ifdef EIGEN_PARSED_BY_DOXYGEN
namespace internal {
// this is a warkaround to doxygen not being able to understand the inheritence logic
// when it is hidden by the dense_xpr_base helper struct.
template<typename Derived> struct dense_xpr_base_dispatcher_for_doxygen;// : public MatrixBase<Derived> {};
/** This class is just a workaround for Doxygen and it does not not actually exist. */
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct dense_xpr_base_dispatcher_for_doxygen<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
: public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
/** This class is just a workaround for Doxygen and it does not not actually exist. */
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct dense_xpr_base_dispatcher_for_doxygen<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
: public ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
} // namespace internal
template<typename Derived>
class PlainObjectBase : public internal::dense_xpr_base_dispatcher_for_doxygen<Derived>
#else
template<typename Derived>
class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
#endif
{
public:
enum { Options = internal::traits<Derived>::Options };
@@ -118,12 +84,14 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
template<typename StrideType> struct StridedConstMapType { typedef Eigen::Map<const Derived, Unaligned, StrideType> type; };
template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, Aligned, StrideType> type; };
template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, Aligned, StrideType> type; };
protected:
DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;
public:
enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::Flags & AlignedBit) != 0 };
enum { NeedsToAlign = (!(Options&DontAlign))
&& SizeAtCompileTime!=Dynamic && ((static_cast<int>(sizeof(Scalar))*SizeAtCompileTime)%16)==0 };
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
Base& base() { return *static_cast<Base*>(this); }
@@ -232,13 +200,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
EIGEN_STRONG_INLINE void resize(Index rows, Index cols)
{
#ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
internal::check_rows_cols_for_overflow(rows, cols);
Index size = rows*cols;
bool size_changed = size != this->size();
m_storage.resize(size, rows, cols);
if(size_changed) EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
#else
internal::check_rows_cols_for_overflow(rows, cols);
m_storage.resize(rows*cols, rows, cols);
#endif
}
@@ -307,7 +273,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)
{
const OtherDerived& other = _other.derived();
internal::check_rows_cols_for_overflow(other.rows(), other.cols());
const Index othersize = other.rows()*other.cols();
if(RowsAtCompileTime == 1)
{
@@ -452,7 +417,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
: m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
{
_check_template_params();
internal::check_rows_cols_for_overflow(other.derived().rows(), other.derived().cols());
Base::operator=(other.derived());
}
@@ -461,71 +425,74 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
* while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
* \a data pointers.
*
* These methods do not allow to specify strides. If you need to specify strides, you have to
* use the Map class directly.
*
* \see class Map
*/
//@{
static inline ConstMapType Map(const Scalar* data)
inline static ConstMapType Map(const Scalar* data)
{ return ConstMapType(data); }
static inline MapType Map(Scalar* data)
inline static MapType Map(Scalar* data)
{ return MapType(data); }
static inline ConstMapType Map(const Scalar* data, Index size)
inline static ConstMapType Map(const Scalar* data, Index size)
{ return ConstMapType(data, size); }
static inline MapType Map(Scalar* data, Index size)
inline static MapType Map(Scalar* data, Index size)
{ return MapType(data, size); }
static inline ConstMapType Map(const Scalar* data, Index rows, Index cols)
inline static ConstMapType Map(const Scalar* data, Index rows, Index cols)
{ return ConstMapType(data, rows, cols); }
static inline MapType Map(Scalar* data, Index rows, Index cols)
inline static MapType Map(Scalar* data, Index rows, Index cols)
{ return MapType(data, rows, cols); }
static inline ConstAlignedMapType MapAligned(const Scalar* data)
inline static ConstAlignedMapType MapAligned(const Scalar* data)
{ return ConstAlignedMapType(data); }
static inline AlignedMapType MapAligned(Scalar* data)
inline static AlignedMapType MapAligned(Scalar* data)
{ return AlignedMapType(data); }
static inline ConstAlignedMapType MapAligned(const Scalar* data, Index size)
inline static ConstAlignedMapType MapAligned(const Scalar* data, Index size)
{ return ConstAlignedMapType(data, size); }
static inline AlignedMapType MapAligned(Scalar* data, Index size)
inline static AlignedMapType MapAligned(Scalar* data, Index size)
{ return AlignedMapType(data, size); }
static inline ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols)
inline static ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols)
{ return ConstAlignedMapType(data, rows, cols); }
static inline AlignedMapType MapAligned(Scalar* data, Index rows, Index cols)
inline static AlignedMapType MapAligned(Scalar* data, Index rows, Index cols)
{ return AlignedMapType(data, rows, cols); }
template<int Outer, int Inner>
static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, const Stride<Outer, Inner>& stride)
inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, const Stride<Outer, Inner>& stride)
{ return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, stride); }
template<int Outer, int Inner>
static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, const Stride<Outer, Inner>& stride)
inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, const Stride<Outer, Inner>& stride)
{ return typename StridedMapType<Stride<Outer, Inner> >::type(data, stride); }
template<int Outer, int Inner>
static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
{ return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, size, stride); }
template<int Outer, int Inner>
static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
{ return typename StridedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
template<int Outer, int Inner>
static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
{ return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
template<int Outer, int Inner>
static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
{ return typename StridedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
template<int Outer, int Inner>
static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, const Stride<Outer, Inner>& stride)
inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, const Stride<Outer, Inner>& stride)
{ return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
template<int Outer, int Inner>
static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, const Stride<Outer, Inner>& stride)
inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, const Stride<Outer, Inner>& stride)
{ return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
template<int Outer, int Inner>
static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
{ return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
template<int Outer, int Inner>
static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
{ return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
template<int Outer, int Inner>
static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
{ return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
template<int Outer, int Inner>
static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
{ return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
//@}
@@ -615,12 +582,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
template<typename T0, typename T1>
EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
{
EIGEN_STATIC_ASSERT(bool(NumTraits<T0>::IsInteger) &&
bool(NumTraits<T1>::IsInteger),
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
eigen_assert(rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
internal::check_rows_cols_for_overflow(rows, cols);
m_storage.resize(rows*cols,rows,cols);
EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
}
@@ -647,7 +610,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
public:
#ifndef EIGEN_PARSED_BY_DOXYGEN
static EIGEN_STRONG_INLINE void _check_template_params()
EIGEN_STRONG_INLINE static void _check_template_params()
{
EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor)
&& EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (Options&RowMajor)==0)
@@ -678,15 +641,14 @@ struct internal::conservative_resize_like_impl
if ( ( Derived::IsRowMajor && _this.cols() == cols) || // row-major and we change only the number of rows
(!Derived::IsRowMajor && _this.rows() == rows) ) // column-major and we change only the number of columns
{
internal::check_rows_cols_for_overflow(rows, cols);
_this.derived().m_storage.conservativeResize(rows*cols,rows,cols);
}
else
{
// The storage order does not allow us to use reallocation.
typename Derived::PlainObject tmp(rows,cols);
const Index common_rows = (std::min)(rows, _this.rows());
const Index common_cols = (std::min)(cols, _this.cols());
const Index common_rows = std::min(rows, _this.rows());
const Index common_cols = std::min(cols, _this.cols());
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
_this.derived().swap(tmp);
}
@@ -719,8 +681,8 @@ struct internal::conservative_resize_like_impl
{
// The storage order does not allow us to use reallocation.
typename Derived::PlainObject tmp(other);
const Index common_rows = (std::min)(tmp.rows(), _this.rows());
const Index common_cols = (std::min)(tmp.cols(), _this.cols());
const Index common_rows = std::min(tmp.rows(), _this.rows());
const Index common_cols = std::min(tmp.cols(), _this.cols());
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
_this.derived().swap(tmp);
}

View File

@@ -1,7 +1,8 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -25,89 +26,603 @@
#ifndef EIGEN_PRODUCT_H
#define EIGEN_PRODUCT_H
template<typename Lhs, typename Rhs> class Product;
template<typename Lhs, typename Rhs, typename StorageKind> class ProductImpl;
/** \class Product
/** \class GeneralProduct
* \ingroup Core_Module
*
* \brief Expression of the product of two arbitrary matrices or vectors
* \brief Expression of the product of two general matrices or vectors
*
* \param Lhs the type of the left-hand side expression
* \param Rhs the type of the right-hand side expression
* \param LhsNested the type used to store the left-hand side
* \param RhsNested the type used to store the right-hand side
* \param ProductMode the type of the product
*
* This class represents an expression of the product of two arbitrary matrices.
* This class represents an expression of the product of two general matrices.
* We call a general matrix, a dense matrix with full storage. For instance,
* This excludes triangular, selfadjoint, and sparse matrices.
* It is the return type of the operator* between general matrices. Its template
* arguments are determined automatically by ProductReturnType. Therefore,
* GeneralProduct should never be used direclty. To determine the result type of a
* function which involves a matrix product, use ProductReturnType::Type.
*
* \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
*/
template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value>
class GeneralProduct;
enum {
Large = 2,
Small = 3
};
namespace internal {
template<typename Lhs, typename Rhs>
struct traits<Product<Lhs, Rhs> >
template<int Rows, int Cols, int Depth> struct product_type_selector;
template<int Size, int MaxSize> struct product_size_category
{
typedef MatrixXpr XprKind;
typedef typename remove_all<Lhs>::type LhsCleaned;
typedef typename remove_all<Rhs>::type RhsCleaned;
typedef typename scalar_product_traits<typename traits<LhsCleaned>::Scalar, typename traits<RhsCleaned>::Scalar>::ReturnType Scalar;
typedef typename promote_storage_type<typename traits<LhsCleaned>::StorageKind,
typename traits<RhsCleaned>::StorageKind>::ret StorageKind;
typedef typename promote_index_type<typename traits<LhsCleaned>::Index,
typename traits<RhsCleaned>::Index>::type Index;
enum {
RowsAtCompileTime = LhsCleaned::RowsAtCompileTime,
ColsAtCompileTime = RhsCleaned::ColsAtCompileTime,
MaxRowsAtCompileTime = LhsCleaned::MaxRowsAtCompileTime,
MaxColsAtCompileTime = RhsCleaned::MaxColsAtCompileTime,
Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0), // TODO should be no storage order
CoeffReadCost = 0 // TODO CoeffReadCost should not be part of the expression traits
enum { is_large = MaxSize == Dynamic ||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
value = is_large ? Large
: Size == 1 ? 1
: Small
};
};
template<typename Lhs, typename Rhs> struct product_type
{
typedef typename remove_all<Lhs>::type _Lhs;
typedef typename remove_all<Rhs>::type _Rhs;
enum {
MaxRows = _Lhs::MaxRowsAtCompileTime,
Rows = _Lhs::RowsAtCompileTime,
MaxCols = _Rhs::MaxColsAtCompileTime,
Cols = _Rhs::ColsAtCompileTime,
MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
_Rhs::MaxRowsAtCompileTime),
Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
_Rhs::RowsAtCompileTime),
LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
};
// the splitting into different lines of code here, introducing the _select enums and the typedef below,
// is to work around an internal compiler error with gcc 4.1 and 4.2.
private:
enum {
rows_select = product_size_category<Rows,MaxRows>::value,
cols_select = product_size_category<Cols,MaxCols>::value,
depth_select = product_size_category<Depth,MaxDepth>::value
};
typedef product_type_selector<rows_select, cols_select, depth_select> selector;
public:
enum {
value = selector::ret
};
#ifdef EIGEN_DEBUG_PRODUCT
static void debug()
{
EIGEN_DEBUG_VAR(Rows);
EIGEN_DEBUG_VAR(Cols);
EIGEN_DEBUG_VAR(Depth);
EIGEN_DEBUG_VAR(rows_select);
EIGEN_DEBUG_VAR(cols_select);
EIGEN_DEBUG_VAR(depth_select);
EIGEN_DEBUG_VAR(value);
}
#endif
};
/* The following allows to select the kind of product at compile time
* based on the three dimensions of the product.
* This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
// FIXME I'm not sure the current mapping is the ideal one.
template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; };
template<> struct product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
} // end namespace internal
/** \class ProductReturnType
* \ingroup Core_Module
*
* \brief Helper class to get the correct and optimized returned type of operator*
*
* \param Lhs the type of the left-hand side
* \param Rhs the type of the right-hand side
* \param ProductMode the type of the product (determined automatically by internal::product_mode)
*
* This class defines the typename Type representing the optimized product expression
* between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
* is the recommended way to define the result type of a function returning an expression
* which involve a matrix product. The class Product should never be
* used directly.
*
* \sa class Product, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
*/
template<typename Lhs, typename Rhs, int ProductType>
struct ProductReturnType
{
// TODO use the nested type to reduce instanciations ????
// typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
// typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type;
};
template<typename Lhs, typename Rhs>
class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
typename internal::traits<Rhs>::StorageKind>::ret>
struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
{
typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
};
template<typename Lhs, typename Rhs>
struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
{
typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
};
// this is a workaround for sun CC
template<typename Lhs, typename Rhs>
struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
{};
/***********************************************************************
* Implementation of Inner Vector Vector Product
***********************************************************************/
// FIXME : maybe the "inner product" could return a Scalar
// instead of a 1x1 matrix ??
// Pro: more natural for the user
// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
// product ends up to a row-vector times col-vector product... To tackle this use
// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
namespace internal {
template<typename Lhs, typename Rhs>
struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
: traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
{};
}
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, InnerProduct>
: internal::no_assignment_operator,
public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
{
typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
public:
typedef typename ProductImpl<
Lhs, Rhs,
typename internal::promote_storage_type<typename Lhs::StorageKind,
typename Rhs::StorageKind>::ret>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
typedef typename Lhs::Nested LhsNested;
typedef typename Rhs::Nested RhsNested;
typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
GeneralProduct(const Lhs& lhs, const Rhs& rhs)
{
eigen_assert(lhs.cols() == rhs.rows()
&& "invalid matrix product"
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
}
inline Index rows() const { return m_lhs.rows(); }
inline Index cols() const { return m_rhs.cols(); }
const LhsNestedCleaned& lhs() const { return m_lhs; }
const RhsNestedCleaned& rhs() const { return m_rhs; }
protected:
const LhsNested m_lhs;
const RhsNested m_rhs;
/** Convertion to scalar */
operator const typename Base::Scalar() const {
return Base::coeff(0,0);
}
};
/***********************************************************************
* Implementation of Outer Vector Vector Product
***********************************************************************/
namespace internal {
template<int StorageOrder> struct outer_product_selector;
template<typename Lhs, typename Rhs>
class ProductImpl<Lhs,Rhs,Dense> : public internal::dense_xpr_base<Product<Lhs,Rhs> >::type
{
typedef Product<Lhs, Rhs> Derived;
public:
struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
: traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
{};
typedef typename internal::dense_xpr_base<Product<Lhs, Rhs> >::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
}
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, OuterProduct>
: public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
{
public:
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
}
template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
{
internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha);
}
};
namespace internal {
template<> struct outer_product_selector<ColMajor> {
template<typename ProductType, typename Dest>
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
typedef typename Dest::Index Index;
// FIXME make sure lhs is sequentially stored
// FIXME not very good if rhs is real and lhs complex while alpha is real too
const Index cols = dest.cols();
for (Index j=0; j<cols; ++j)
dest.col(j) += (alpha * prod.rhs().coeff(j)) * prod.lhs();
}
};
template<> struct outer_product_selector<RowMajor> {
template<typename ProductType, typename Dest>
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
typedef typename Dest::Index Index;
// FIXME make sure rhs is sequentially stored
// FIXME not very good if lhs is real and rhs complex while alpha is real too
const Index rows = dest.rows();
for (Index i=0; i<rows; ++i)
dest.row(i) += (alpha * prod.lhs().coeff(i)) * prod.rhs();
}
};
} // end namespace internal
/***********************************************************************
* Implementation of General Matrix Vector Product
***********************************************************************/
/* According to the shape/flags of the matrix we have to distinghish 3 different cases:
* 1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine
* 2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine
* 3 - all other cases are handled using a simple loop along the outer-storage direction.
* Therefore we need a lower level meta selector.
* Furthermore, if the matrix is the rhs, then the product has to be transposed.
*/
namespace internal {
template<typename Lhs, typename Rhs>
struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
: traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
{};
template<int Side, int StorageOrder, bool BlasCompatible>
struct gemv_selector;
} // end namespace internal
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, GemvProduct>
: public ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs>
{
public:
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{
// EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value),
// YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
}
enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
}
};
namespace internal {
// The vector is on the left => transposition
template<int StorageOrder, bool BlasCompatible>
struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
Transpose<Dest> destT(dest);
enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct>
(prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha);
}
};
template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
{
EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
};
template<typename Scalar,int Size>
struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
{
EIGEN_STRONG_INLINE Scalar* data() { return 0; }
};
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
{
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
};
template<> struct gemv_selector<OnTheRight,ColMajor,true>
{
template<typename ProductType, typename Dest>
static inline void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename ProductType::Index Index;
typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
typedef typename ProductType::Scalar ResScalar;
typedef typename ProductType::RealScalar RealScalar;
typedef typename ProductType::ActualLhsType ActualLhsType;
typedef typename ProductType::ActualRhsType ActualRhsType;
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
const ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
const ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
};
gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
bool alphaIsCompatible = (!ComplexByReal) || (imag(actualAlpha)==RealScalar(0));
bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
ResScalar* actualDestPtr;
bool freeDestPtr = false;
if (evalToDest)
{
actualDestPtr = &dest.coeffRef(0);
}
else
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if((actualDestPtr = static_dest.data())==0)
{
freeDestPtr = true;
actualDestPtr = ei_aligned_stack_new(ResScalar,dest.size());
}
if(!alphaIsCompatible)
{
MappedDest(actualDestPtr, dest.size()).setZero();
compatibleAlpha = RhsScalar(1);
}
else
MappedDest(actualDestPtr, dest.size()) = dest;
}
general_matrix_vector_product
<Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
&actualLhs.coeffRef(0,0), actualLhs.outerStride(),
actualRhs.data(), actualRhs.innerStride(),
actualDestPtr, 1,
compatibleAlpha);
if (!evalToDest)
{
if(!alphaIsCompatible)
dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
else
dest = MappedDest(actualDestPtr, dest.size());
if(freeDestPtr) ei_aligned_stack_delete(ResScalar, actualDestPtr, dest.size());
}
}
};
template<> struct gemv_selector<OnTheRight,RowMajor,true>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
typedef typename ProductType::Scalar ResScalar;
typedef typename ProductType::Index Index;
typedef typename ProductType::ActualLhsType ActualLhsType;
typedef typename ProductType::ActualRhsType ActualRhsType;
typedef typename ProductType::_ActualRhsType _ActualRhsType;
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
};
gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
RhsScalar* actualRhsPtr;
bool freeRhsPtr = false;
if (DirectlyUseRhs)
{
actualRhsPtr = const_cast<RhsScalar*>(&actualRhs.coeffRef(0));
}
else
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = actualRhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if((actualRhsPtr = static_rhs.data())==0)
{
freeRhsPtr = true;
actualRhsPtr = ei_aligned_stack_new(RhsScalar, actualRhs.size());
}
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
}
general_matrix_vector_product
<Index,LhsScalar,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
&actualLhs.coeffRef(0,0), actualLhs.outerStride(),
actualRhsPtr, 1,
&dest.coeffRef(0,0), dest.innerStride(),
actualAlpha);
if((!DirectlyUseRhs) && freeRhsPtr) ei_aligned_stack_delete(RhsScalar, actualRhsPtr, prod.rhs().size());
}
};
template<> struct gemv_selector<OnTheRight,ColMajor,false>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename Dest::Index Index;
// TODO makes sure dest is sequentially stored in memory, otherwise use a temp
const Index size = prod.rhs().rows();
for(Index k=0; k<size; ++k)
dest += (alpha*prod.rhs().coeff(k)) * prod.lhs().col(k);
}
};
template<> struct gemv_selector<OnTheRight,RowMajor,false>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename Dest::Index Index;
// TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
const Index rows = prod.rows();
for(Index i=0; i<rows; ++i)
dest.coeffRef(i) += alpha * (prod.lhs().row(i).cwiseProduct(prod.rhs().transpose())).sum();
}
};
} // end namespace internal
/***************************************************************************
* Implementation of matrix base methods
***************************************************************************/
/** \returns the matrix product of \c *this and \a other.
*
* \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*().
*
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
*/
template<typename Derived>
template<typename OtherDerived>
inline const typename ProductReturnType<Derived,OtherDerived>::Type
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
{
// A note regarding the function declaration: In MSVC, this function will sometimes
// not be inlined since DenseStorage is an unwindable object for dynamic
// matrices and product types are holding a member to store the result.
// Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
enum {
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|| OtherDerived::RowsAtCompileTime==Dynamic
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
};
// note to the lost user:
// * for a dot product use: v1.dot(v2)
// * for a coeff-wise product use: v1.cwiseProduct(v2)
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
#ifdef EIGEN_DEBUG_PRODUCT
internal::product_type<Derived,OtherDerived>::debug();
#endif
return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
}
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
*
* The returned product will behave like any other expressions: the coefficients of the product will be
* computed once at a time as requested. This might be useful in some extremely rare cases when only
* a small and no coherent fraction of the result's coefficients have to be computed.
*
* \warning This version of the matrix product can be much much slower. So use it only if you know
* what you are doing and that you measured a true speed improvement.
*
* \sa operator*(const MatrixBase&)
*/
template<typename Derived>
template<typename OtherDerived>
const typename LazyProductReturnType<Derived,OtherDerived>::Type
MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
{
enum {
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|| OtherDerived::RowsAtCompileTime==Dynamic
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
};
// note to the lost user:
// * for a dot product use: v1.dot(v2)
// * for a coeff-wise product use: v1.cwiseProduct(v2)
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
}
#endif // EIGEN_PRODUCT_H

View File

@@ -115,10 +115,10 @@ class ProductBase : public MatrixBase<Derived>
inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,Scalar(1)); }
template<typename Dest>
inline void addTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(1)); }
inline void addTo(Dest& dst) const { scaleAndAddTo(dst,1); }
template<typename Dest>
inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); }
inline void subTo(Dest& dst) const { scaleAndAddTo(dst,-1); }
template<typename Dest>
inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { derived().scaleAndAddTo(dst,alpha); }
@@ -179,8 +179,8 @@ class ProductBase : public MatrixBase<Derived>
protected:
LhsNested m_lhs;
RhsNested m_rhs;
const LhsNested m_lhs;
const RhsNested m_rhs;
mutable PlainObject m_result;
};
@@ -256,16 +256,16 @@ class ScaledProduct
: Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {}
template<typename Dest>
inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst, Scalar(1)); }
inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,m_alpha); }
template<typename Dest>
inline void addTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(1)); }
inline void addTo(Dest& dst) const { scaleAndAddTo(dst,m_alpha); }
template<typename Dest>
inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); }
inline void subTo(Dest& dst) const { scaleAndAddTo(dst,-m_alpha); }
template<typename Dest>
inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { m_prod.derived().scaleAndAddTo(dst,alpha * m_alpha); }
inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { m_prod.derived().scaleAndAddTo(dst,alpha); }
const Scalar& alpha() const { return m_alpha; }

View File

@@ -95,7 +95,7 @@ struct redux_novec_unroller
typedef typename Derived::Scalar Scalar;
static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func& func)
{
return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
@@ -112,7 +112,7 @@ struct redux_novec_unroller<Func, Derived, Start, 1>
typedef typename Derived::Scalar Scalar;
static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func&)
{
return mat.coeffByOuterInner(outer, inner);
}
@@ -125,7 +125,7 @@ template<typename Func, typename Derived, int Start>
struct redux_novec_unroller<Func, Derived, Start, 0>
{
typedef typename Derived::Scalar Scalar;
static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
EIGEN_STRONG_INLINE static Scalar run(const Derived&, const Func&) { return Scalar(); }
};
/*** vectorization ***/
@@ -141,7 +141,7 @@ struct redux_vec_unroller
typedef typename Derived::Scalar Scalar;
typedef typename packet_traits<Scalar>::type PacketScalar;
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func& func)
{
return func.packetOp(
redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
@@ -162,7 +162,7 @@ struct redux_vec_unroller<Func, Derived, Start, 1>
typedef typename Derived::Scalar Scalar;
typedef typename packet_traits<Scalar>::type PacketScalar;
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func&)
{
return mat.template packetByOuterInner<alignment>(outer, inner);
}
@@ -214,33 +214,20 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
const Index size = mat.size();
eigen_assert(size && "you are using an empty matrix");
const Index packetSize = packet_traits<Scalar>::size;
const Index alignedStart = internal::first_aligned(mat);
const Index alignedStart = first_aligned(mat);
enum {
alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
? Aligned : Unaligned
};
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
const Index alignedEnd2 = alignedStart + alignedSize2;
const Index alignedEnd = alignedStart + alignedSize;
const Index alignedSize = ((size-alignedStart)/packetSize)*packetSize;
const Index alignedEnd = alignedStart + alignedSize;
Scalar res;
if(alignedSize)
{
PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
{
PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
{
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
}
packet_res0 = func.packetOp(packet_res0,packet_res1);
if(alignedEnd>alignedEnd2)
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
}
res = func.predux(packet_res0);
PacketScalar packet_res = mat.template packet<alignment>(alignedStart);
for(Index index = alignedStart + packetSize; index < alignedEnd; index += packetSize)
packet_res = func.packetOp(packet_res, mat.template packet<alignment>(index));
res = func.predux(packet_res);
for(Index index = 0; index < alignedStart; ++index)
res = func(res,mat.coeff(index));
@@ -309,7 +296,7 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
Size = Derived::SizeAtCompileTime,
VectorizedSize = (Size / PacketSize) * PacketSize
};
static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
EIGEN_STRONG_INLINE static Scalar run(const Derived& mat, const Func& func)
{
eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));

View File

@@ -122,13 +122,9 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
return m_matrix.template packet<LoadMode>(actual_row, actual_col);
}
const typename internal::remove_all<typename MatrixType::Nested>::type& nestedExpression() const
{
return m_matrix;
}
protected:
typename MatrixType::Nested m_matrix;
const typename MatrixType::Nested m_matrix;
const internal::variable_if_dynamic<Index, RowFactor> m_rowFactor;
const internal::variable_if_dynamic<Index, ColFactor> m_colFactor;
};

View File

@@ -183,14 +183,8 @@ template<typename MatrixType, int Direction> class Reverse
m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, internal::preverse(x));
}
const typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() const
{
return m_matrix;
}
protected:
typename MatrixType::Nested m_matrix;
const typename MatrixType::Nested m_matrix;
};
/** \returns an expression of the reverse of *this.

View File

@@ -101,25 +101,10 @@ class Select : internal::no_assignment_operator,
return m_else.coeff(i);
}
const ConditionMatrixType& conditionMatrix() const
{
return m_condition;
}
const ThenMatrixType& thenMatrix() const
{
return m_then;
}
const ElseMatrixType& elseMatrix() const
{
return m_else;
}
protected:
typename ConditionMatrixType::Nested m_condition;
typename ThenMatrixType::Nested m_then;
typename ElseMatrixType::Nested m_else;
const typename ConditionMatrixType::Nested m_condition;
const typename ThenMatrixType::Nested m_then;
const typename ElseMatrixType::Nested m_else;
};

View File

@@ -32,13 +32,13 @@
* \brief Expression of a selfadjoint matrix from a triangular part of a dense matrix
*
* \param MatrixType the type of the dense matrix storing the coefficients
* \param TriangularPart can be either \c #Lower or \c #Upper
* \param TriangularPart can be either \c Lower or \c Upper
*
* This class is an expression of a sefladjoint matrix from a triangular part of a matrix
* with given dense storage of the coefficients. It is the return type of MatrixBase::selfadjointView()
* and most of the time this is the only way that it is used.
*
* \sa class TriangularBase, MatrixBase::selfadjointView()
* \sa class TriangularBase, MatrixBase::selfAdjointView()
*/
namespace internal {
@@ -82,7 +82,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
};
typedef typename MatrixType::PlainObject PlainObject;
inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
inline SelfAdjointView(const MatrixType& matrix) : m_matrix(matrix)
{}
inline Index rows() const { return m_matrix.rows(); }
@@ -199,7 +199,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
#endif
protected:
MatrixTypeNested m_matrix;
const MatrixTypeNested m_matrix;
};
@@ -222,7 +222,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), U
row = (UnrollCount-1) % Derived1::RowsAtCompileTime
};
static inline void run(Derived1 &dst, const Derived2 &src)
inline static void run(Derived1 &dst, const Derived2 &src)
{
triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount-1, ClearOpposite>::run(dst, src);
@@ -236,7 +236,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), U
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, 0, ClearOpposite>
{
static inline void run(Derived1 &, const Derived2 &) {}
inline static void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite>
@@ -247,7 +247,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), U
row = (UnrollCount-1) % Derived1::RowsAtCompileTime
};
static inline void run(Derived1 &dst, const Derived2 &src)
inline static void run(Derived1 &dst, const Derived2 &src)
{
triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount-1, ClearOpposite>::run(dst, src);
@@ -261,14 +261,14 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), U
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, 0, ClearOpposite>
{
static inline void run(Derived1 &, const Derived2 &) {}
inline static void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
static inline void run(Derived1 &dst, const Derived2 &src)
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
@@ -285,7 +285,7 @@ struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dyn
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dynamic, ClearOpposite>
{
static inline void run(Derived1 &dst, const Derived2 &src)
inline static void run(Derived1 &dst, const Derived2 &src)
{
typedef typename Derived1::Index Index;
for(Index i = 0; i < dst.rows(); ++i)

View File

@@ -163,16 +163,6 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
return Base::operator=(rhs);
}
Lhs& expression() const
{
return m_matrix;
}
const BinaryOp& functor() const
{
return m_functor;
}
protected:
Lhs& m_matrix;
const BinaryOp& m_functor;

View File

@@ -74,19 +74,26 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,1>
// FIXME find a way to allow an inner stride if packet_traits<Scalar>::size==1
bool useRhsDirectly = Rhs::InnerStrideAtCompileTime==1 || rhs.innerStride()==1;
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhs,rhs.size(),
(useRhsDirectly ? rhs.data() : 0));
if(!useRhsDirectly)
RhsScalar* actualRhs;
if(useRhsDirectly)
{
actualRhs = &rhs.coeffRef(0);
}
else
{
actualRhs = ei_aligned_stack_new(RhsScalar,rhs.size());
MappedRhs(actualRhs,rhs.size()) = rhs;
}
triangular_solve_vector<LhsScalar, RhsScalar, typename Lhs::Index, Side, Mode, LhsProductTraits::NeedToConjugate,
(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor>
::run(actualLhs.cols(), actualLhs.data(), actualLhs.outerStride(), actualRhs);
if(!useRhsDirectly)
{
rhs = MappedRhs(actualRhs, rhs.size());
ei_aligned_stack_delete(RhsScalar, actualRhs, rhs.size());
}
}
};
@@ -100,7 +107,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
static void run(const Lhs& lhs, Rhs& rhs)
{
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsProductTraits::extract(lhs);
const ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor>
::run(lhs.rows(), Side==OnTheLeft? rhs.cols() : rhs.rows(), &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride());
@@ -177,8 +184,10 @@ template<int Side, typename OtherDerived>
void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
{
OtherDerived& other = _other.const_cast_derived();
eigen_assert( cols() == rows() && ((Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols())) );
eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
eigen_assert(cols() == rows());
eigen_assert( (Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols()) );
eigen_assert(!(Mode & ZeroDiag));
eigen_assert(Mode & (Upper|Lower));
enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit && OtherDerived::IsVectorAtCompileTime };
typedef typename internal::conditional<copy,
@@ -253,7 +262,7 @@ template<int Side, typename TriangularType, typename Rhs> struct triangular_solv
protected:
const TriangularType& m_triangularMatrix;
typename Rhs::Nested m_rhs;
const typename Rhs::Nested m_rhs;
};
} // namespace internal

View File

@@ -56,11 +56,10 @@ template<typename Derived>
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
MatrixBase<Derived>::stableNorm() const
{
using std::min;
const Index blockSize = 4096;
RealScalar scale(0);
RealScalar invScale(1);
RealScalar ssq(0); // sum of square
RealScalar scale = 0;
RealScalar invScale = 1;
RealScalar ssq = 0; // sum of square
enum {
Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0
};
@@ -69,7 +68,7 @@ MatrixBase<Derived>::stableNorm() const
if (bi>0)
internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale);
for (; bi<n; bi+=blockSize)
internal::stable_norm_kernel(this->segment(bi,(min)(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
internal::stable_norm_kernel(this->segment(bi,std::min(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
return scale * internal::sqrt(ssq);
}
@@ -86,9 +85,6 @@ template<typename Derived>
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
MatrixBase<Derived>::blueNorm() const
{
using std::pow;
using std::min;
using std::max;
static Index nmax = -1;
static RealScalar b1, b2, s1m, s2m, overfl, rbig, relerr;
if(nmax <= 0)
@@ -103,25 +99,25 @@ MatrixBase<Derived>::blueNorm() const
// For portability, the PORT subprograms "ilmaeh" and "rlmach"
// are used. For any specific computer, each of the assignment
// statements can be replaced
nbig = (std::numeric_limits<Index>::max)(); // largest integer
nbig = std::numeric_limits<Index>::max(); // largest integer
ibeta = std::numeric_limits<RealScalar>::radix; // base for floating-point numbers
it = std::numeric_limits<RealScalar>::digits; // number of base-beta digits in mantissa
iemin = std::numeric_limits<RealScalar>::min_exponent; // minimum exponent
iemax = std::numeric_limits<RealScalar>::max_exponent; // maximum exponent
rbig = (std::numeric_limits<RealScalar>::max)(); // largest floating-point number
rbig = std::numeric_limits<RealScalar>::max(); // largest floating-point number
iexp = -((1-iemin)/2);
b1 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // lower boundary of midrange
b1 = RealScalar(std::pow(RealScalar(ibeta),RealScalar(iexp))); // lower boundary of midrange
iexp = (iemax + 1 - it)/2;
b2 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // upper boundary of midrange
b2 = RealScalar(std::pow(RealScalar(ibeta),RealScalar(iexp))); // upper boundary of midrange
iexp = (2-iemin)/2;
s1m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for lower range
s1m = RealScalar(std::pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for lower range
iexp = - ((iemax+it)/2);
s2m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range
s2m = RealScalar(std::pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range
overfl = rbig*s2m; // overflow boundary for abig
eps = RealScalar(pow(double(ibeta), 1-it));
eps = RealScalar(std::pow(double(ibeta), 1-it));
relerr = internal::sqrt(eps); // tolerance for neglecting asml
abig = RealScalar(1.0/eps - 1.0);
if (RealScalar(nbig)>abig) nmax = int(abig); // largest safe n
@@ -167,8 +163,8 @@ MatrixBase<Derived>::blueNorm() const
}
else
return internal::sqrt(amed);
asml = (min)(abig, amed);
abig = (max)(abig, amed);
asml = std::min(abig, amed);
abig = std::max(abig, amed);
if(asml <= abig*relerr)
return abig;
else

View File

@@ -52,15 +52,6 @@ template<typename ExpressionType> class SwapWrapper
inline Index cols() const { return m_expression.cols(); }
inline Index outerStride() const { return m_expression.outerStride(); }
inline Index innerStride() const { return m_expression.innerStride(); }
typedef typename internal::conditional<
internal::is_lvalue<ExpressionType>::value,
Scalar,
const Scalar
>::type ScalarWithConstIfNotLvalue;
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
inline const Scalar* data() const { return m_expression.data(); }
inline Scalar& coeffRef(Index row, Index col)
{
@@ -128,8 +119,6 @@ template<typename ExpressionType> class SwapWrapper
_other.template writePacket<LoadMode>(index, tmp);
}
ExpressionType& expression() const { return m_expression; }
protected:
ExpressionType& m_expression;
};

View File

@@ -91,7 +91,7 @@ template<typename MatrixType> class Transpose
nestedExpression() { return m_matrix.const_cast_derived(); }
protected:
typename MatrixType::Nested m_matrix;
const typename MatrixType::Nested m_matrix;
};
namespace internal {
@@ -152,12 +152,12 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
return derived().nestedExpression().coeffRef(index);
}
inline CoeffReturnType coeff(Index row, Index col) const
inline const CoeffReturnType coeff(Index row, Index col) const
{
return derived().nestedExpression().coeff(col, row);
}
inline CoeffReturnType coeff(Index index) const
inline const CoeffReturnType coeff(Index index) const
{
return derived().nestedExpression().coeff(index);
}
@@ -350,14 +350,15 @@ struct blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> >
template<bool DestIsTransposed, typename OtherDerived>
struct check_transpose_aliasing_compile_time_selector
{
enum { ret = bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed };
enum { ret = blas_traits<OtherDerived>::IsTransposed != DestIsTransposed
};
};
template<bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
struct check_transpose_aliasing_compile_time_selector<DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
{
enum { ret = bool(blas_traits<DerivedA>::IsTransposed) != DestIsTransposed
|| bool(blas_traits<DerivedB>::IsTransposed) != DestIsTransposed
enum { ret = blas_traits<DerivedA>::IsTransposed != DestIsTransposed
|| blas_traits<DerivedB>::IsTransposed != DestIsTransposed
};
};
@@ -366,7 +367,7 @@ struct check_transpose_aliasing_run_time_selector
{
static bool run(const Scalar* dest, const OtherDerived& src)
{
return (bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed) && (dest!=0 && dest==(Scalar*)extract_data(src));
return (blas_traits<OtherDerived>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(Scalar*)extract_data(src));
}
};

View File

@@ -404,7 +404,7 @@ struct transposition_matrix_product_retval
protected:
const TranspositionType& m_transpositions;
typename MatrixType::Nested m_matrix;
const typename MatrixType::Nested m_matrix;
};
} // end namespace internal

View File

@@ -111,7 +111,6 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
EIGEN_ONLY_USED_FOR_DEBUG(col);
eigen_assert(col>=0 && col<cols() && row>=0 && row<rows());
const int mode = int(Mode) & ~SelfAdjoint;
EIGEN_ONLY_USED_FOR_DEBUG(mode);
eigen_assert((mode==Upper && col>=row)
|| (mode==Lower && col<=row)
|| ((mode==StrictlyUpper || mode==UnitUpper) && col>row)
@@ -135,13 +134,13 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
* \brief Base class for triangular part in a matrix
*
* \param MatrixType the type of the object in which we are taking the triangular part
* \param Mode the kind of triangular matrix expression to construct. Can be #Upper,
* #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower.
* This is in fact a bit field; it must have either #Upper or #Lower,
* and additionnaly it may have #UnitDiag or #ZeroDiag or neither.
* \param Mode the kind of triangular matrix expression to construct. Can be Upper,
* Lower, UpperSelfadjoint, or LowerSelfadjoint. This is in fact a bit field;
* it must have either Upper or Lower, and additionnaly it may have either
* UnitDiag or Selfadjoint.
*
* This class represents a triangular part of a matrix, not necessarily square. Strictly speaking, for rectangular
* matrices one should speak of "trapezoid" parts. This class is the return type
* matrices one should speak ok "trapezoid" parts. This class is the return type
* of MatrixBase::triangularView() and most of the time this is the only way it is used.
*
* \sa MatrixBase::triangularView()
@@ -273,8 +272,11 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
inline const TriangularView<MatrixConjugateReturnType,Mode> conjugate() const
{ return m_matrix.conjugate(); }
/** \sa MatrixBase::adjoint() */
inline TriangularView<typename MatrixType::AdjointReturnType,TransposeMode> adjoint()
{ return m_matrix.adjoint(); }
/** \sa MatrixBase::adjoint() const */
inline const TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> adjoint() const
inline const TriangularView<typename MatrixType::AdjointReturnType,TransposeMode> adjoint() const
{ return m_matrix.adjoint(); }
/** \sa MatrixBase::transpose() */
@@ -285,13 +287,11 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
}
/** \sa MatrixBase::transpose() const */
inline const TriangularView<Transpose<MatrixType>,TransposeMode> transpose() const
{
return m_matrix.transpose();
}
{ return m_matrix.transpose(); }
/** Efficient triangular matrix times vector/matrix product */
template<typename OtherDerived>
TriangularProduct<Mode,true,MatrixType,false,OtherDerived, OtherDerived::IsVectorAtCompileTime>
TriangularProduct<Mode,true,MatrixType,false,OtherDerived,OtherDerived::IsVectorAtCompileTime>
operator*(const MatrixBase<OtherDerived>& rhs) const
{
return TriangularProduct
@@ -374,8 +374,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
template<typename OtherDerived>
void swap(MatrixBase<OtherDerived> const & other)
{
SwapWrapper<MatrixType> swaper(const_cast<MatrixType&>(m_matrix));
TriangularView<SwapWrapper<MatrixType>,Mode>(swaper).lazyAssign(other.derived());
TriangularView<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived());
}
Scalar determinant() const
@@ -433,7 +432,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_STRONG_INLINE TriangularView& assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha);
MatrixTypeNested m_matrix;
const MatrixTypeNested m_matrix;
};
/***************************************************************************
@@ -449,10 +448,8 @@ struct triangular_assignment_selector
col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
row = (UnrollCount-1) % Derived1::RowsAtCompileTime
};
typedef typename Derived1::Scalar Scalar;
static inline void run(Derived1 &dst, const Derived2 &src)
inline static void run(Derived1 &dst, const Derived2 &src)
{
triangular_assignment_selector<Derived1, Derived2, Mode, UnrollCount-1, ClearOpposite>::run(dst, src);
@@ -469,9 +466,9 @@ struct triangular_assignment_selector
else if(ClearOpposite)
{
if (Mode&UnitDiag && row==col)
dst.coeffRef(row, col) = Scalar(1);
dst.coeffRef(row, col) = 1;
else
dst.coeffRef(row, col) = Scalar(0);
dst.coeffRef(row, col) = 0;
}
}
};
@@ -480,24 +477,23 @@ struct triangular_assignment_selector
template<typename Derived1, typename Derived2, unsigned int Mode, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, Mode, 0, ClearOpposite>
{
static inline void run(Derived1 &, const Derived2 &) {}
inline static void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, Upper, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
typedef typename Derived1::Scalar Scalar;
static inline void run(Derived1 &dst, const Derived2 &src)
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
Index maxi = (std::min)(j, dst.rows()-1);
Index maxi = std::min(j, dst.rows()-1);
for(Index i = 0; i <= maxi; ++i)
dst.copyCoeff(i, j, src);
if (ClearOpposite)
for(Index i = maxi+1; i < dst.rows(); ++i)
dst.coeffRef(i, j) = Scalar(0);
dst.coeffRef(i, j) = 0;
}
}
};
@@ -506,16 +502,16 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
static inline void run(Derived1 &dst, const Derived2 &src)
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
for(Index i = j; i < dst.rows(); ++i)
dst.copyCoeff(i, j, src);
Index maxi = (std::min)(j, dst.rows());
Index maxi = std::min(j, dst.rows());
if (ClearOpposite)
for(Index i = 0; i < maxi; ++i)
dst.coeffRef(i, j) = static_cast<typename Derived1::Scalar>(0);
dst.coeffRef(i, j) = 0;
}
}
};
@@ -524,11 +520,11 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
static inline void run(Derived1 &dst, const Derived2 &src)
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
Index maxi = (std::min)(j, dst.rows());
Index maxi = std::min(j, dst.rows());
for(Index i = 0; i < maxi; ++i)
dst.copyCoeff(i, j, src);
if (ClearOpposite)
@@ -542,16 +538,16 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
static inline void run(Derived1 &dst, const Derived2 &src)
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
for(Index i = j+1; i < dst.rows(); ++i)
dst.copyCoeff(i, j, src);
Index maxi = (std::min)(j, dst.rows()-1);
Index maxi = std::min(j, dst.rows()-1);
if (ClearOpposite)
for(Index i = 0; i <= maxi; ++i)
dst.coeffRef(i, j) = static_cast<typename Derived1::Scalar>(0);
dst.coeffRef(i, j) = 0;
}
}
};
@@ -560,11 +556,11 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
static inline void run(Derived1 &dst, const Derived2 &src)
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
Index maxi = (std::min)(j, dst.rows());
Index maxi = std::min(j, dst.rows());
for(Index i = 0; i < maxi; ++i)
dst.copyCoeff(i, j, src);
if (ClearOpposite)
@@ -580,11 +576,11 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
static inline void run(Derived1 &dst, const Derived2 &src)
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
Index maxi = (std::min)(j, dst.rows());
Index maxi = std::min(j, dst.rows());
for(Index i = maxi+1; i < dst.rows(); ++i)
dst.copyCoeff(i, j, src);
if (ClearOpposite)
@@ -760,8 +756,8 @@ typename internal::eigen2_part_return_type<Derived, Mode>::type MatrixBase<Deriv
/**
* \returns an expression of a triangular view extracted from the current matrix
*
* The parameter \a Mode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper,
* \c #Lower, \c #StrictlyLower, \c #UnitLower.
* The parameter \a Mode can have the following values: \c Upper, \c StrictlyUpper, \c UnitUpper,
* \c Lower, \c StrictlyLower, \c UnitLower.
*
* Example: \include MatrixBase_extract.cpp
* Output: \verbinclude MatrixBase_extract.out
@@ -796,7 +792,7 @@ bool MatrixBase<Derived>::isUpperTriangular(RealScalar prec) const
RealScalar maxAbsOnUpperPart = static_cast<RealScalar>(-1);
for(Index j = 0; j < cols(); ++j)
{
Index maxi = (std::min)(j, rows()-1);
Index maxi = std::min(j, rows()-1);
for(Index i = 0; i <= maxi; ++i)
{
RealScalar absValue = internal::abs(coeff(i,j));
@@ -828,7 +824,7 @@ bool MatrixBase<Derived>::isLowerTriangular(RealScalar prec) const
RealScalar threshold = maxAbsOnLowerPart * prec;
for(Index j = 1; j < cols(); ++j)
{
Index maxi = (std::min)(j, rows()-1);
Index maxi = std::min(j, rows()-1);
for(Index i = 0; i < maxi; ++i)
if(internal::abs(coeff(i, j)) > threshold) return false;
}

View File

@@ -31,9 +31,9 @@
*
* \brief Generic expression of a partially reduxed matrix
*
* \tparam MatrixType the type of the matrix we are applying the redux operation
* \tparam MemberOp type of the member functor
* \tparam Direction indicates the direction of the redux (#Vertical or #Horizontal)
* \param MatrixType the type of the matrix we are applying the redux operation
* \param MemberOp type of the member functor
* \param Direction indicates the direction of the redux (Vertical or Horizontal)
*
* This class represents an expression of a partial redux operator of a matrix.
* It is the return type of some VectorwiseOp functions,
@@ -110,7 +110,7 @@ class PartialReduxExpr : internal::no_assignment_operator,
}
protected:
MatrixTypeNested m_matrix;
const MatrixTypeNested m_matrix;
const MemberOp m_functor;
};
@@ -164,7 +164,7 @@ struct member_redux {
* \brief Pseudo expression providing partial reduction operations
*
* \param ExpressionType the type of the object on which to do partial reductions
* \param Direction indicates the direction of the redux (#Vertical or #Horizontal)
* \param Direction indicates the direction of the redux (Vertical or Horizontal)
*
* This class represents a pseudo expression with partial reduction features.
* It is the return type of DenseBase::colwise() and DenseBase::rowwise()
@@ -237,10 +237,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
typename ExtendedType<OtherDerived>::Type
extendedTo(const DenseBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Vertical, OtherDerived::MaxColsAtCompileTime==1),
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Horizontal, OtherDerived::MaxRowsAtCompileTime==1),
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
return typename ExtendedType<OtherDerived>::Type
(other.derived(),
Direction==Vertical ? 1 : m_matrix.rows(),
@@ -421,9 +418,10 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
ExpressionType& operator=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
//eigen_assert((m_matrix.isNull()) == (other.isNull())); FIXME
return const_cast<ExpressionType&>(m_matrix = extendedTo(other.derived()));
for(Index j=0; j<subVectors(); ++j)
subVector(j) = other;
return const_cast<ExpressionType&>(m_matrix);
}
/** Adds the vector \a other to each subvector of \c *this */
@@ -431,8 +429,9 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
ExpressionType& operator+=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
return const_cast<ExpressionType&>(m_matrix += extendedTo(other.derived()));
for(Index j=0; j<subVectors(); ++j)
subVector(j) += other.derived();
return const_cast<ExpressionType&>(m_matrix);
}
/** Substracts the vector \a other to each subvector of \c *this */
@@ -440,29 +439,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
ExpressionType& operator-=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
return const_cast<ExpressionType&>(m_matrix -= extendedTo(other.derived()));
}
/** Multiples each subvector of \c *this by the vector \a other */
template<typename OtherDerived>
ExpressionType& operator*=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
m_matrix *= extendedTo(other.derived());
return const_cast<ExpressionType&>(m_matrix);
}
/** Divides each subvector of \c *this by the vector \a other */
template<typename OtherDerived>
ExpressionType& operator/=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
m_matrix /= extendedTo(other.derived());
for(Index j=0; j<subVectors(); ++j)
subVector(j) -= other.derived();
return const_cast<ExpressionType&>(m_matrix);
}
@@ -473,8 +451,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
const typename ExtendedType<OtherDerived>::Type>
operator+(const DenseBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
return m_matrix + extendedTo(other.derived());
}
@@ -485,39 +462,10 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
const typename ExtendedType<OtherDerived>::Type>
operator-(const DenseBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
return m_matrix - extendedTo(other.derived());
}
/** Returns the expression where each subvector is the product of the vector \a other
* by the corresponding subvector of \c *this */
template<typename OtherDerived> EIGEN_STRONG_INLINE
CwiseBinaryOp<internal::scalar_product_op<Scalar>,
const ExpressionTypeNestedCleaned,
const typename ExtendedType<OtherDerived>::Type>
operator*(const DenseBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
return m_matrix * extendedTo(other.derived());
}
/** Returns the expression where each subvector is the quotient of the corresponding
* subvector of \c *this by the vector \a other */
template<typename OtherDerived>
CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
const ExpressionTypeNestedCleaned,
const typename ExtendedType<OtherDerived>::Type>
operator/(const DenseBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
return m_matrix / extendedTo(other.derived());
}
/////////// Geometry module ///////////
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
@@ -561,7 +509,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Example: \include MatrixBase_colwise.cpp
* Output: \verbinclude MatrixBase_colwise.out
*
* \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
* \sa rowwise(), class VectorwiseOp
*/
template<typename Derived>
inline const typename DenseBase<Derived>::ConstColwiseReturnType
@@ -572,7 +520,7 @@ DenseBase<Derived>::colwise() const
/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
*
* \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
* \sa rowwise(), class VectorwiseOp
*/
template<typename Derived>
inline typename DenseBase<Derived>::ColwiseReturnType
@@ -586,7 +534,7 @@ DenseBase<Derived>::colwise()
* Example: \include MatrixBase_rowwise.cpp
* Output: \verbinclude MatrixBase_rowwise.out
*
* \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
* \sa colwise(), class VectorwiseOp
*/
template<typename Derived>
inline const typename DenseBase<Derived>::ConstRowwiseReturnType
@@ -597,7 +545,7 @@ DenseBase<Derived>::rowwise() const
/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
*
* \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
* \sa colwise(), class VectorwiseOp
*/
template<typename Derived>
inline typename DenseBase<Derived>::RowwiseReturnType

View File

@@ -35,7 +35,7 @@ struct visitor_impl
row = (UnrollCount-1) % Derived::RowsAtCompileTime
};
static inline void run(const Derived &mat, Visitor& visitor)
inline static void run(const Derived &mat, Visitor& visitor)
{
visitor_impl<Visitor, Derived, UnrollCount-1>::run(mat, visitor);
visitor(mat.coeff(row, col), row, col);
@@ -45,7 +45,7 @@ struct visitor_impl
template<typename Visitor, typename Derived>
struct visitor_impl<Visitor, Derived, 1>
{
static inline void run(const Derived &mat, Visitor& visitor)
inline static void run(const Derived &mat, Visitor& visitor)
{
return visitor.init(mat.coeff(0, 0), 0, 0);
}
@@ -55,7 +55,7 @@ template<typename Visitor, typename Derived>
struct visitor_impl<Visitor, Derived, Dynamic>
{
typedef typename Derived::Index Index;
static inline void run(const Derived& mat, Visitor& visitor)
inline static void run(const Derived& mat, Visitor& visitor)
{
visitor.init(mat.coeff(0,0), 0, 0);
for(Index i = 1; i < mat.rows(); ++i)

View File

@@ -168,7 +168,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
template<int Offset>
struct palign_impl<Offset,Packet2cf>
{
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
{
if (Offset==1)
{

View File

@@ -487,7 +487,7 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
template<int Offset>
struct palign_impl<Offset,Packet4f>
{
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
{
if (Offset!=0)
first = vec_sld(first, second, Offset*4);
@@ -497,7 +497,7 @@ struct palign_impl<Offset,Packet4f>
template<int Offset>
struct palign_impl<Offset,Packet4i>
{
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
{
if (Offset!=0)
first = vec_sld(first, second, Offset*4);

View File

@@ -27,8 +27,8 @@
namespace internal {
static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000);
static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000);
static uint32x4_t p4ui_CONJ_XOR = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
static uint32x2_t p2ui_CONJ_XOR = { 0x00000000, 0x80000000 };
//---------- float ----------
struct Packet2cf
@@ -43,7 +43,6 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
typedef Packet2cf type;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
HasAdd = 1,

View File

@@ -52,16 +52,6 @@ typedef uint32x4_t Packet4ui;
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
const Packet4i p4i_##NAME = pset1<Packet4i>(X)
#if defined(__llvm__) && !defined(__clang__)
//Special treatment for Apple's llvm-gcc, its NEON packet types are unions
#define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}}
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}}
#else
//Default initializer for packets
#define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y}
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W}
#endif
#ifndef __pld
#define __pld(x) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (x) : "cc" );
#endif
@@ -94,7 +84,7 @@ template<> struct packet_traits<int> : default_packet_traits
};
};
#if EIGEN_GNUC_AT_MOST(4,4) && !defined(__llvm__)
#if EIGEN_GNUC_AT_MOST(4,4)
// workaround gcc 4.2, 4.3 and 4.4 compilatin issue
EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
@@ -110,12 +100,12 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a)
{
Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
Packet4f countdown = { 3, 2, 1, 0 };
return vaddq_f32(pset1<Packet4f>(a), countdown);
}
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a)
{
Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
Packet4i countdown = { 3, 2, 1, 0 };
return vaddq_s32(pset1<Packet4i>(a), countdown);
}
@@ -201,14 +191,14 @@ template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
{
float32x2_t lo, hi;
lo = vdup_n_f32(*from);
hi = vdup_n_f32(*(from+1));
hi = vdup_n_f32(*from);
return vcombine_f32(lo, hi);
}
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
{
int32x2_t lo, hi;
lo = vdup_n_s32(*from);
hi = vdup_n_s32(*(from+1));
hi = vdup_n_s32(*from);
return vcombine_s32(lo, hi);
}
@@ -405,29 +395,25 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
return s[0];
}
// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
#define PALIGN_NEON(Offset,Type,Command) \
template<>\
struct palign_impl<Offset,Type>\
{\
EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
{\
if (Offset!=0)\
first = Command(first, second, Offset);\
}\
};\
template<int Offset>
struct palign_impl<Offset,Packet4f>
{
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
{
if (Offset!=0)
first = vextq_f32(first, second, Offset);
}
};
PALIGN_NEON(0,Packet4f,vextq_f32)
PALIGN_NEON(1,Packet4f,vextq_f32)
PALIGN_NEON(2,Packet4f,vextq_f32)
PALIGN_NEON(3,Packet4f,vextq_f32)
PALIGN_NEON(0,Packet4i,vextq_s32)
PALIGN_NEON(1,Packet4i,vextq_s32)
PALIGN_NEON(2,Packet4i,vextq_s32)
PALIGN_NEON(3,Packet4i,vextq_s32)
#undef PALIGN_NEON
template<int Offset>
struct palign_impl<Offset,Packet4i>
{
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
{
if (Offset!=0)
first = vextq_s32(first, second, Offset);
}
};
} // end namespace internal

View File

@@ -102,7 +102,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<flo
Packet2cf res;
#if EIGEN_GNUC_AT_MOST(4,2)
// workaround annoying "may be used uninitialized in this function" warning with gcc 4.2
res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from));
res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)&from);
#else
res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
#endif
@@ -151,7 +151,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
template<int Offset>
struct palign_impl<Offset,Packet2cf>
{
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
{
if (Offset==1)
{
@@ -350,7 +350,7 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const
template<int Offset>
struct palign_impl<Offset,Packet1cd>
{
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
EIGEN_STRONG_INLINE static void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
{
// FIXME is it sure we never have to align a Packet1cd?
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...

View File

@@ -110,18 +110,9 @@ template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
#if defined(_MSC_VER) && (_MSC_VER==1500)
// Workaround MSVC 9 internal compiler error.
// TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode
// TODO: let's check whether there does not exist a better fix, like adding a pset0() function. (it crashed on pset1(0)).
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps(from,from,from,from); }
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
#else
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set1_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
#endif
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
@@ -291,7 +282,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
{
return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(from))), 0, 0, 1, 1);
return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd((const double*)from)), 0, 0, 1, 1);
}
template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
{ return pset1<Packet2d>(from[0]); }
@@ -311,8 +302,8 @@ template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d&
_mm_storel_pd((to), from);
_mm_storeh_pd((to+1), from);
}
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), _mm_castps_pd(from)); }
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), _mm_castsi128_pd(from)); }
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, _mm_castps_pd(from)); }
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, _mm_castsi128_pd(from)); }
// some compilers might be tempted to perform multiple moves instead of using a vector path.
template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
@@ -550,7 +541,7 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
template<int Offset>
struct palign_impl<Offset,Packet4f>
{
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
{
if (Offset!=0)
first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
@@ -560,7 +551,7 @@ struct palign_impl<Offset,Packet4f>
template<int Offset>
struct palign_impl<Offset,Packet4i>
{
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
{
if (Offset!=0)
first = _mm_alignr_epi8(second,first, Offset*4);
@@ -570,7 +561,7 @@ struct palign_impl<Offset,Packet4i>
template<int Offset>
struct palign_impl<Offset,Packet2d>
{
static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
EIGEN_STRONG_INLINE static void run(Packet2d& first, const Packet2d& second)
{
if (Offset==1)
first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
@@ -581,7 +572,7 @@ struct palign_impl<Offset,Packet2d>
template<int Offset>
struct palign_impl<Offset,Packet4f>
{
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
{
if (Offset==1)
{
@@ -604,7 +595,7 @@ struct palign_impl<Offset,Packet4f>
template<int Offset>
struct palign_impl<Offset,Packet4i>
{
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
{
if (Offset==1)
{
@@ -627,7 +618,7 @@ struct palign_impl<Offset,Packet4i>
template<int Offset>
struct palign_impl<Offset,Packet2d>
{
static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
EIGEN_STRONG_INLINE static void run(Packet2d& first, const Packet2d& second)
{
if (Offset==1)
{

View File

@@ -224,8 +224,8 @@ class CoeffBasedProduct
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
protected:
typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
const LhsNested m_lhs;
const RhsNested m_rhs;
mutable PlainObject m_result;
};
@@ -252,7 +252,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
@@ -263,7 +263,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
}
@@ -273,7 +273,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
{
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
@@ -291,7 +291,7 @@ struct product_coeff_vectorized_unroller
{
typedef typename Lhs::Index Index;
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
{
product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
@@ -302,7 +302,7 @@ template<typename Lhs, typename Rhs, typename Packet>
struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
{
pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
}
@@ -314,7 +314,7 @@ struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, Re
typedef typename Lhs::PacketScalar Packet;
typedef typename Lhs::Index Index;
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
Packet pres;
product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
@@ -327,7 +327,7 @@ template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int R
struct product_coeff_vectorized_dyn_selector
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
}
@@ -339,7 +339,7 @@ template<typename Lhs, typename Rhs, int RhsCols>
struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
}
@@ -349,7 +349,7 @@ template<typename Lhs, typename Rhs, int LhsRows>
struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
}
@@ -359,7 +359,7 @@ template<typename Lhs, typename Rhs>
struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
res = lhs.transpose().cwiseProduct(rhs).sum();
}
@@ -369,7 +369,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
}
@@ -383,7 +383,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo
struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
@@ -394,7 +394,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo
struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
@@ -405,7 +405,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
}
@@ -415,7 +415,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
}
@@ -425,7 +425,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
{
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
@@ -438,7 +438,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
{
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));

View File

@@ -30,18 +30,19 @@ namespace internal {
template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false>
class gebp_traits;
/** \internal \returns b if a<=0, and returns a otherwise. */
inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff_t b)
{
return a<=0 ? b : a;
}
/** \internal */
inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0)
{
static std::ptrdiff_t m_l1CacheSize = manage_caching_sizes_helper(queryL1CacheSize(),8 * 1024);
static std::ptrdiff_t m_l2CacheSize = manage_caching_sizes_helper(queryTopLevelCacheSize(),1*1024*1024);
static std::ptrdiff_t m_l1CacheSize = 0;
static std::ptrdiff_t m_l2CacheSize = 0;
if(m_l1CacheSize==0)
{
m_l1CacheSize = queryL1CacheSize();
m_l2CacheSize = queryTopLevelCacheSize();
if(m_l1CacheSize<=0) m_l1CacheSize = 8 * 1024;
if(m_l2CacheSize<=0) m_l2CacheSize = 1 * 1024 * 1024;
}
if(action==SetAction)
{
@@ -80,7 +81,6 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdi
template<typename LhsScalar, typename RhsScalar, int KcFactor>
void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
{
EIGEN_UNUSED_VARIABLE(n);
// Explanations:
// Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and
// mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed
@@ -102,6 +102,7 @@ void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrd
k = std::min<std::ptrdiff_t>(k, l1/kdiv);
std::ptrdiff_t _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
if(_m<m) m = _m & mr_mask;
n = n;
}
template<typename LhsScalar, typename RhsScalar>
@@ -117,14 +118,14 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st
// FIXME (a bit overkill maybe ?)
template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
{
c = cj.pmadd(a,b,c);
}
};
template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t)
EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, T& a, T& b, T& c, T& t)
{
t = b; t = cj.pmul(a,t); c = padd(c,t);
}
@@ -535,7 +536,7 @@ struct gebp_kernel
ResPacketSize = Traits::ResPacketSize
};
EIGEN_DONT_INLINE EIGEN_FLATTEN_ATTRIB
EIGEN_FLATTEN_ATTRIB
void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB = 0)
{
@@ -597,64 +598,64 @@ struct gebp_kernel
if(nr==2)
{
LhsPacket A0, A1;
RhsPacket B_0;
RhsPacket B0;
RhsPacket T0;
EIGEN_ASM_COMMENT("mybegin2");
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadLhs(&blA[1*LhsProgress], A1);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[1*RhsProgress], B_0);
traits.madd(A0,B_0,C1,T0);
traits.madd(A1,B_0,C5,B_0);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[1*RhsProgress], B0);
traits.madd(A0,B0,C1,T0);
traits.madd(A1,B0,C5,B0);
traits.loadLhs(&blA[2*LhsProgress], A0);
traits.loadLhs(&blA[3*LhsProgress], A1);
traits.loadRhs(&blB[2*RhsProgress], B_0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[3*RhsProgress], B_0);
traits.madd(A0,B_0,C1,T0);
traits.madd(A1,B_0,C5,B_0);
traits.loadRhs(&blB[2*RhsProgress], B0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[3*RhsProgress], B0);
traits.madd(A0,B0,C1,T0);
traits.madd(A1,B0,C5,B0);
traits.loadLhs(&blA[4*LhsProgress], A0);
traits.loadLhs(&blA[5*LhsProgress], A1);
traits.loadRhs(&blB[4*RhsProgress], B_0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[5*RhsProgress], B_0);
traits.madd(A0,B_0,C1,T0);
traits.madd(A1,B_0,C5,B_0);
traits.loadRhs(&blB[4*RhsProgress], B0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[5*RhsProgress], B0);
traits.madd(A0,B0,C1,T0);
traits.madd(A1,B0,C5,B0);
traits.loadLhs(&blA[6*LhsProgress], A0);
traits.loadLhs(&blA[7*LhsProgress], A1);
traits.loadRhs(&blB[6*RhsProgress], B_0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[7*RhsProgress], B_0);
traits.madd(A0,B_0,C1,T0);
traits.madd(A1,B_0,C5,B_0);
traits.loadRhs(&blB[6*RhsProgress], B0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[7*RhsProgress], B0);
traits.madd(A0,B0,C1,T0);
traits.madd(A1,B0,C5,B0);
EIGEN_ASM_COMMENT("myend");
}
else
{
EIGEN_ASM_COMMENT("mybegin4");
LhsPacket A0, A1;
RhsPacket B_0, B1, B2, B3;
RhsPacket B0, B1, B2, B3;
RhsPacket T0;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadLhs(&blA[1*LhsProgress], A1);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.loadRhs(&blB[1*RhsProgress], B1);
traits.madd(A0,B_0,C0,T0);
traits.madd(A0,B0,C0,T0);
traits.loadRhs(&blB[2*RhsProgress], B2);
traits.madd(A1,B_0,C4,B_0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[3*RhsProgress], B3);
traits.loadRhs(&blB[4*RhsProgress], B_0);
traits.loadRhs(&blB[4*RhsProgress], B0);
traits.madd(A0,B1,C1,T0);
traits.madd(A1,B1,C5,B1);
traits.loadRhs(&blB[5*RhsProgress], B1);
@@ -666,9 +667,9 @@ EIGEN_ASM_COMMENT("mybegin4");
traits.madd(A1,B3,C7,B3);
traits.loadLhs(&blA[3*LhsProgress], A1);
traits.loadRhs(&blB[7*RhsProgress], B3);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[8*RhsProgress], B_0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[8*RhsProgress], B0);
traits.madd(A0,B1,C1,T0);
traits.madd(A1,B1,C5,B1);
traits.loadRhs(&blB[9*RhsProgress], B1);
@@ -681,9 +682,9 @@ EIGEN_ASM_COMMENT("mybegin4");
traits.loadLhs(&blA[5*LhsProgress], A1);
traits.loadRhs(&blB[11*RhsProgress], B3);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[12*RhsProgress], B_0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[12*RhsProgress], B0);
traits.madd(A0,B1,C1,T0);
traits.madd(A1,B1,C5,B1);
traits.loadRhs(&blB[13*RhsProgress], B1);
@@ -695,8 +696,8 @@ EIGEN_ASM_COMMENT("mybegin4");
traits.madd(A1,B3,C7,B3);
traits.loadLhs(&blA[7*LhsProgress], A1);
traits.loadRhs(&blB[15*RhsProgress], B3);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.madd(A0,B1,C1,T0);
traits.madd(A1,B1,C5,B1);
traits.madd(A0,B2,C2,T0);
@@ -714,32 +715,32 @@ EIGEN_ASM_COMMENT("mybegin4");
if(nr==2)
{
LhsPacket A0, A1;
RhsPacket B_0;
RhsPacket B0;
RhsPacket T0;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadLhs(&blA[1*LhsProgress], A1);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[1*RhsProgress], B_0);
traits.madd(A0,B_0,C1,T0);
traits.madd(A1,B_0,C5,B_0);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[1*RhsProgress], B0);
traits.madd(A0,B0,C1,T0);
traits.madd(A1,B0,C5,B0);
}
else
{
LhsPacket A0, A1;
RhsPacket B_0, B1, B2, B3;
RhsPacket B0, B1, B2, B3;
RhsPacket T0;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadLhs(&blA[1*LhsProgress], A1);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.loadRhs(&blB[1*RhsProgress], B1);
traits.madd(A0,B_0,C0,T0);
traits.madd(A0,B0,C0,T0);
traits.loadRhs(&blB[2*RhsProgress], B2);
traits.madd(A1,B_0,C4,B_0);
traits.madd(A1,B0,C4,B0);
traits.loadRhs(&blB[3*RhsProgress], B3);
traits.madd(A0,B1,C1,T0);
traits.madd(A1,B1,C5,B1);
@@ -826,42 +827,42 @@ EIGEN_ASM_COMMENT("mybegin4");
if(nr==2)
{
LhsPacket A0;
RhsPacket B_0, B1;
RhsPacket B0, B1;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.loadRhs(&blB[1*RhsProgress], B1);
traits.madd(A0,B_0,C0,B_0);
traits.loadRhs(&blB[2*RhsProgress], B_0);
traits.madd(A0,B0,C0,B0);
traits.loadRhs(&blB[2*RhsProgress], B0);
traits.madd(A0,B1,C1,B1);
traits.loadLhs(&blA[1*LhsProgress], A0);
traits.loadRhs(&blB[3*RhsProgress], B1);
traits.madd(A0,B_0,C0,B_0);
traits.loadRhs(&blB[4*RhsProgress], B_0);
traits.madd(A0,B0,C0,B0);
traits.loadRhs(&blB[4*RhsProgress], B0);
traits.madd(A0,B1,C1,B1);
traits.loadLhs(&blA[2*LhsProgress], A0);
traits.loadRhs(&blB[5*RhsProgress], B1);
traits.madd(A0,B_0,C0,B_0);
traits.loadRhs(&blB[6*RhsProgress], B_0);
traits.madd(A0,B0,C0,B0);
traits.loadRhs(&blB[6*RhsProgress], B0);
traits.madd(A0,B1,C1,B1);
traits.loadLhs(&blA[3*LhsProgress], A0);
traits.loadRhs(&blB[7*RhsProgress], B1);
traits.madd(A0,B_0,C0,B_0);
traits.madd(A0,B0,C0,B0);
traits.madd(A0,B1,C1,B1);
}
else
{
LhsPacket A0;
RhsPacket B_0, B1, B2, B3;
RhsPacket B0, B1, B2, B3;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.loadRhs(&blB[1*RhsProgress], B1);
traits.madd(A0,B_0,C0,B_0);
traits.madd(A0,B0,C0,B0);
traits.loadRhs(&blB[2*RhsProgress], B2);
traits.loadRhs(&blB[3*RhsProgress], B3);
traits.loadRhs(&blB[4*RhsProgress], B_0);
traits.loadRhs(&blB[4*RhsProgress], B0);
traits.madd(A0,B1,C1,B1);
traits.loadRhs(&blB[5*RhsProgress], B1);
traits.madd(A0,B2,C2,B2);
@@ -869,8 +870,8 @@ EIGEN_ASM_COMMENT("mybegin4");
traits.madd(A0,B3,C3,B3);
traits.loadLhs(&blA[1*LhsProgress], A0);
traits.loadRhs(&blB[7*RhsProgress], B3);
traits.madd(A0,B_0,C0,B_0);
traits.loadRhs(&blB[8*RhsProgress], B_0);
traits.madd(A0,B0,C0,B0);
traits.loadRhs(&blB[8*RhsProgress], B0);
traits.madd(A0,B1,C1,B1);
traits.loadRhs(&blB[9*RhsProgress], B1);
traits.madd(A0,B2,C2,B2);
@@ -879,8 +880,8 @@ EIGEN_ASM_COMMENT("mybegin4");
traits.loadLhs(&blA[2*LhsProgress], A0);
traits.loadRhs(&blB[11*RhsProgress], B3);
traits.madd(A0,B_0,C0,B_0);
traits.loadRhs(&blB[12*RhsProgress], B_0);
traits.madd(A0,B0,C0,B0);
traits.loadRhs(&blB[12*RhsProgress], B0);
traits.madd(A0,B1,C1,B1);
traits.loadRhs(&blB[13*RhsProgress], B1);
traits.madd(A0,B2,C2,B2);
@@ -889,7 +890,7 @@ EIGEN_ASM_COMMENT("mybegin4");
traits.loadLhs(&blA[3*LhsProgress], A0);
traits.loadRhs(&blB[15*RhsProgress], B3);
traits.madd(A0,B_0,C0,B_0);
traits.madd(A0,B0,C0,B0);
traits.madd(A0,B1,C1,B1);
traits.madd(A0,B2,C2,B2);
traits.madd(A0,B3,C3,B3);
@@ -904,26 +905,26 @@ EIGEN_ASM_COMMENT("mybegin4");
if(nr==2)
{
LhsPacket A0;
RhsPacket B_0, B1;
RhsPacket B0, B1;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.loadRhs(&blB[1*RhsProgress], B1);
traits.madd(A0,B_0,C0,B_0);
traits.madd(A0,B0,C0,B0);
traits.madd(A0,B1,C1,B1);
}
else
{
LhsPacket A0;
RhsPacket B_0, B1, B2, B3;
RhsPacket B0, B1, B2, B3;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.loadRhs(&blB[1*RhsProgress], B1);
traits.loadRhs(&blB[2*RhsProgress], B2);
traits.loadRhs(&blB[3*RhsProgress], B3);
traits.madd(A0,B_0,C0,B_0);
traits.madd(A0,B0,C0,B0);
traits.madd(A0,B1,C1,B1);
traits.madd(A0,B2,C2,B2);
traits.madd(A0,B3,C3,B3);
@@ -970,26 +971,26 @@ EIGEN_ASM_COMMENT("mybegin4");
if(nr==2)
{
LhsScalar A0;
RhsScalar B_0, B1;
RhsScalar B0, B1;
A0 = blA[k];
B_0 = blB[0];
B0 = blB[0];
B1 = blB[1];
MADD(cj,A0,B_0,C0,B_0);
MADD(cj,A0,B0,C0,B0);
MADD(cj,A0,B1,C1,B1);
}
else
{
LhsScalar A0;
RhsScalar B_0, B1, B2, B3;
RhsScalar B0, B1, B2, B3;
A0 = blA[k];
B_0 = blB[0];
B0 = blB[0];
B1 = blB[1];
B2 = blB[2];
B3 = blB[3];
MADD(cj,A0,B_0,C0,B_0);
MADD(cj,A0,B0,C0,B0);
MADD(cj,A0,B1,C1,B1);
MADD(cj,A0,B2,C2,B2);
MADD(cj,A0,B3,C3,B3);
@@ -1026,14 +1027,14 @@ EIGEN_ASM_COMMENT("mybegin4");
for(Index k=0; k<depth; k++)
{
LhsPacket A0, A1;
RhsPacket B_0;
RhsPacket B0;
RhsPacket T0;
traits.loadLhs(&blA[0*LhsProgress], A0);
traits.loadLhs(&blA[1*LhsProgress], A1);
traits.loadRhs(&blB[0*RhsProgress], B_0);
traits.madd(A0,B_0,C0,T0);
traits.madd(A1,B_0,C4,B_0);
traits.loadRhs(&blB[0*RhsProgress], B0);
traits.madd(A0,B0,C0,T0);
traits.madd(A1,B0,C4,B0);
blB += RhsProgress;
blA += 2*LhsProgress;
@@ -1065,10 +1066,10 @@ EIGEN_ASM_COMMENT("mybegin4");
for(Index k=0; k<depth; k++)
{
LhsPacket A0;
RhsPacket B_0;
RhsPacket B0;
traits.loadLhs(blA, A0);
traits.loadRhs(blB, B_0);
traits.madd(A0, B_0, C0, B_0);
traits.loadRhs(blB, B0);
traits.madd(A0, B0, C0, B0);
blB += RhsProgress;
blA += LhsProgress;
}
@@ -1090,8 +1091,8 @@ EIGEN_ASM_COMMENT("mybegin4");
for(Index k=0; k<depth; k++)
{
LhsScalar A0 = blA[k];
RhsScalar B_0 = blB[k];
MADD(cj, A0, B_0, C0, B_0);
RhsScalar B0 = blB[k];
MADD(cj, A0, B0, C0, B0);
}
res[(j2+0)*resStride + i] += alpha*C0;
}
@@ -1102,7 +1103,7 @@ EIGEN_ASM_COMMENT("mybegin4");
#undef CJMADD
// pack a block of the lhs
// The traversal is as follow (mr==4):
// The travesal is as follow (mr==4):
// 0 4 8 12 ...
// 1 5 9 13 ...
// 2 6 10 14 ...
@@ -1118,15 +1119,11 @@ EIGEN_ASM_COMMENT("mybegin4");
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
struct gemm_pack_lhs
{
EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
Index stride=0, Index offset=0)
{
typedef typename packet_traits<Scalar>::type Packet;
enum { PacketSize = packet_traits<Scalar>::size };
EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
// enum { PacketSize = packet_traits<Scalar>::size };
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
Index count = 0;
@@ -1134,44 +1131,9 @@ struct gemm_pack_lhs
for(Index i=0; i<peeled_mc; i+=Pack1)
{
if(PanelMode) count += Pack1 * offset;
if(StorageOrder==ColMajor)
{
for(Index k=0; k<depth; k++)
{
Packet A, B, C, D;
if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
}
}
else
{
for(Index k=0; k<depth; k++)
{
// TODO add a vectorized transpose here
Index w=0;
for(; w<Pack1-3; w+=4)
{
Scalar a(cj(lhs(i+w+0, k))),
b(cj(lhs(i+w+1, k))),
c(cj(lhs(i+w+2, k))),
d(cj(lhs(i+w+3, k)));
blockA[count++] = a;
blockA[count++] = b;
blockA[count++] = c;
blockA[count++] = d;
}
if(Pack1%4)
for(;w<Pack1;++w)
blockA[count++] = cj(lhs(i+w, k));
}
}
for(Index k=0; k<depth; k++)
for(Index w=0; w<Pack1; w++)
blockA[count++] = cj(lhs(i+w, k));
if(PanelMode) count += Pack1 * (stride-offset-depth);
}
if(rows-peeled_mc>=Pack2)
@@ -1205,10 +1167,9 @@ struct gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
{
typedef typename packet_traits<Scalar>::type Packet;
enum { PacketSize = packet_traits<Scalar>::size };
EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
Index stride=0, Index offset=0)
{
EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
Index packet_cols = (cols/nr) * nr;
@@ -1253,10 +1214,9 @@ template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode
struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
{
enum { PacketSize = packet_traits<Scalar>::size };
EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
Index stride=0, Index offset=0)
{
EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
Index packet_cols = (cols/nr) * nr;

View File

@@ -78,7 +78,7 @@ static void run(Index rows, Index cols, Index depth,
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
Index mc = std::min(rows,blocking.mc()); // cache block size along the M direction
//Index nc = blocking.nc(); // cache block size along the N direction
gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
@@ -94,16 +94,15 @@ static void run(Index rows, Index cols, Index depth,
std::size_t sizeA = kc*mc;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, 0);
ei_declare_aligned_stack_constructed_variable(RhsScalar, w, sizeW, 0);
LhsScalar* blockA = ei_aligned_stack_new(LhsScalar, sizeA);
RhsScalar* w = ei_aligned_stack_new(RhsScalar, sizeW);
RhsScalar* blockB = blocking.blockB();
eigen_internal_assert(blockB!=0);
// For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
for(Index k=0; k<depth; k+=kc)
{
const Index actual_kc = (std::min)(k+kc,depth)-k; // => rows of B', and cols of the A'
const Index actual_kc = std::min(k+kc,depth)-k; // => rows of B', and cols of the A'
// In order to reduce the chance that a thread has to wait for the other,
// let's start by packing A'.
@@ -140,7 +139,7 @@ static void run(Index rows, Index cols, Index depth,
// Then keep going as usual with the remaining A'
for(Index i=mc; i<rows; i+=mc)
{
const Index actual_mc = (std::min)(i+mc,rows)-i;
const Index actual_mc = std::min(i+mc,rows)-i;
// pack A_i,k to A'
pack_lhs(blockA, &lhs(i,k), lhsStride, actual_kc, actual_mc);
@@ -155,6 +154,9 @@ static void run(Index rows, Index cols, Index depth,
#pragma omp atomic
--(info[j].users);
}
ei_aligned_stack_delete(LhsScalar, blockA, kc*mc);
ei_aligned_stack_delete(RhsScalar, w, sizeW);
}
else
#endif // EIGEN_HAS_OPENMP
@@ -165,16 +167,15 @@ static void run(Index rows, Index cols, Index depth,
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockW, sizeW, blocking.blockW());
LhsScalar *blockA = blocking.blockA()==0 ? ei_aligned_stack_new(LhsScalar, sizeA) : blocking.blockA();
RhsScalar *blockB = blocking.blockB()==0 ? ei_aligned_stack_new(RhsScalar, sizeB) : blocking.blockB();
RhsScalar *blockW = blocking.blockW()==0 ? ei_aligned_stack_new(RhsScalar, sizeW) : blocking.blockW();
// For each horizontal panel of the rhs, and corresponding panel of the lhs...
// (==GEMM_VAR1)
for(Index k2=0; k2<depth; k2+=kc)
{
const Index actual_kc = (std::min)(k2+kc,depth)-k2;
const Index actual_kc = std::min(k2+kc,depth)-k2;
// OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.
// => Pack rhs's panel into a sequential chunk of memory (L2 caching)
@@ -187,7 +188,7 @@ static void run(Index rows, Index cols, Index depth,
// (==GEPP_VAR1)
for(Index i2=0; i2<rows; i2+=mc)
{
const Index actual_mc = (std::min)(i2+mc,rows)-i2;
const Index actual_mc = std::min(i2+mc,rows)-i2;
// We pack the lhs's block into a sequential chunk of memory (L1 caching)
// Note that this block will be read a very high number of times, which is equal to the number of
@@ -199,6 +200,10 @@ static void run(Index rows, Index cols, Index depth,
}
}
if(blocking.blockA()==0) ei_aligned_stack_delete(LhsScalar, blockA, sizeA);
if(blocking.blockB()==0) ei_aligned_stack_delete(RhsScalar, blockB, sizeB);
if(blocking.blockW()==0) ei_aligned_stack_delete(RhsScalar, blockW, sizeW);
}
}
@@ -412,8 +417,8 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);

View File

@@ -42,14 +42,14 @@ struct tribb_kernel;
template <typename Index,
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
int ResStorageOrder, int UpLo, int Version = Specialized>
int ResStorageOrder, int UpLo>
struct general_matrix_matrix_triangular_product;
// as usual if the result is row major => we transpose the product
template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo,Version>
{
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
@@ -63,8 +63,8 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
};
template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Version>
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
@@ -83,10 +83,10 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
if(mc > Traits::nr)
mc = (mc/Traits::nr)*Traits::nr;
LhsScalar* blockA = ei_aligned_stack_new(LhsScalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*size;
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(RhsScalar, allocatedBlockB, sizeB, 0);
RhsScalar* allocatedBlockB = ei_aligned_stack_new(RhsScalar, sizeB);
RhsScalar* blockB = allocatedBlockB + sizeW;
gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
@@ -96,14 +96,14 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
for(Index k2=0; k2<depth; k2+=kc)
{
const Index actual_kc = (std::min)(k2+kc,depth)-k2;
const Index actual_kc = std::min(k2+kc,depth)-k2;
// note that the actual rhs is the transpose/adjoint of mat
pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, size);
for(Index i2=0; i2<size; i2+=mc)
{
const Index actual_mc = (std::min)(i2+mc,size)-i2;
const Index actual_mc = std::min(i2+mc,size)-i2;
pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
@@ -112,7 +112,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
// 2 - the actual_mc x actual_mc symmetric block => processed with a special kernel
// 3 - after the diagonal => processed with gebp or skipped
if (UpLo==Lower)
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, (std::min)(size,i2), alpha,
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, std::min(size,i2), alpha,
-1, -1, 0, 0, allocatedBlockB);
sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB);
@@ -120,11 +120,13 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
if (UpLo==Upper)
{
Index j2 = i2+actual_mc;
gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, (std::max)(Index(0), size-j2), alpha,
gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, std::max(Index(0), size-j2), alpha,
-1, -1, 0, 0, allocatedBlockB);
}
}
}
ei_aligned_stack_delete(LhsScalar, blockA, kc*mc);
ei_aligned_stack_delete(RhsScalar, allocatedBlockB, sizeB);
}
};
@@ -201,13 +203,13 @@ TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
const ActualLhs actualLhs = LhsBlasTraits::extract(prod.lhs());
typedef typename internal::remove_all<typename ProductDerived::RhsNested>::type Rhs;
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
const ActualRhs actualRhs = RhsBlasTraits::extract(prod.rhs());
typename ProductDerived::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());

View File

@@ -1,142 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Level 3 BLAS SYRK/HERK implementation.
********************************************************************************
*/
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
namespace internal {
template <typename Index, typename Scalar, int AStorageOrder, bool ConjugateA, int ResStorageOrder, int UpLo>
struct general_matrix_matrix_rankupdate :
general_matrix_matrix_triangular_product<
Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,UpLo,BuiltIn> {};
// try to go to BLAS specialization
#define EIGEN_MKL_RANKUPDATE_SPECIALIZE(Scalar) \
template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs, int UpLo> \
struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \
Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Specialized> { \
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) \
{ \
if (lhs==rhs) { \
general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
} else { \
general_matrix_matrix_triangular_product<Index, \
Scalar, LhsStorageOrder, ConjugateLhs, \
Scalar, RhsStorageOrder, ConjugateRhs, \
ColMajor, UpLo, BuiltIn> \
::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
} \
} \
};
EIGEN_MKL_RANKUPDATE_SPECIALIZE(double)
//EIGEN_MKL_RANKUPDATE_SPECIALIZE(dcomplex)
EIGEN_MKL_RANKUPDATE_SPECIALIZE(float)
//EIGEN_MKL_RANKUPDATE_SPECIALIZE(scomplex)
// SYRK for float/double
#define EIGEN_MKL_RANKUPDATE_R(EIGTYPE, MKLTYPE, MKLFUNC) \
template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
enum { \
IsLower = (UpLo&Lower) == Lower, \
LowUp = IsLower ? Lower : Upper, \
conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \
}; \
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
{ \
/* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \
\
MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \
MKLTYPE alpha_, beta_; \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
MKLFUNC(&uplo, &trans, &n, &k, &alpha_, lhs, &lda, &beta_, res, &ldc); \
} \
};
// HERK for complex data
#define EIGEN_MKL_RANKUPDATE_C(EIGTYPE, MKLTYPE, RTYPE, MKLFUNC) \
template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
enum { \
IsLower = (UpLo&Lower) == Lower, \
LowUp = IsLower ? Lower : Upper, \
conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \
}; \
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
{ \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \
\
MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \
RTYPE alpha_, beta_; \
const EIGTYPE* a_ptr; \
\
/* Set alpha_ & beta_ */ \
/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); */\
/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1));*/ \
alpha_ = alpha.real(); \
beta_ = 1.0; \
/* Copy with conjugation in some cases*/ \
MatrixType a; \
if (conjA) { \
Map<const MatrixType, 0, OuterStride<> > mapA(lhs,n,k,OuterStride<>(lhsStride)); \
a = mapA.conjugate(); \
lda = a.outerStride(); \
a_ptr = a.data(); \
} else a_ptr=lhs; \
MKLFUNC(&uplo, &trans, &n, &k, &alpha_, (MKLTYPE*)a_ptr, &lda, &beta_, (MKLTYPE*)res, &ldc); \
} \
};
EIGEN_MKL_RANKUPDATE_R(double, double, dsyrk)
EIGEN_MKL_RANKUPDATE_R(float, float, ssyrk)
//EIGEN_MKL_RANKUPDATE_C(dcomplex, MKL_Complex16, double, zherk)
//EIGEN_MKL_RANKUPDATE_C(scomplex, MKL_Complex8, double, cherk)
} // end namespace internal
#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H

View File

@@ -1,114 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* General matrix-matrix product functionality based on ?GEMM.
********************************************************************************
*/
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
#define EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
namespace internal {
/**********************************************************************
* This file implements general matrix-matrix multiplication using BLAS
* gemm function via partial specialization of
* general_matrix_matrix_product::run(..) method for float, double,
* std::complex<float> and std::complex<double> types
**********************************************************************/
// gemm specialization
#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, MKLTYPE, MKLPREFIX) \
template< \
typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor> \
{ \
static void run(Index rows, Index cols, Index depth, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha, \
level3_blocking<EIGTYPE, EIGTYPE>& blocking, \
GemmParallelInfo<Index>* info = 0) \
{ \
using std::conj; \
\
char transa, transb; \
MKL_INT m, n, k, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
MKLTYPE alpha_, beta_; \
MatrixX##EIGPREFIX a_tmp, b_tmp; \
EIGTYPE myone(1);\
\
/* Set transpose options */ \
transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
\
/* Set m, n, k */ \
m = (MKL_INT)rows; \
n = (MKL_INT)cols; \
k = (MKL_INT)depth; \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
\
/* Set lda, ldb, ldc */ \
lda = (MKL_INT)lhsStride; \
ldb = (MKL_INT)rhsStride; \
ldc = (MKL_INT)resStride; \
\
/* Set a, b, c */ \
if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \
a_tmp = lhs.conjugate(); \
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else a = _lhs; \
\
if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \
b_tmp = rhs.conjugate(); \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
} else b = _rhs; \
\
MKLPREFIX##gemm(&transa, &transb, &m, &n, &k, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
}};
GEMM_SPECIALIZATION(double, d, double, d)
GEMM_SPECIALIZATION(float, f, float, s)
GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, z)
GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, c)
} //end of namespase
#endif // EIGEN_GENERAL_MATRIX_MATRIX_MKL_H

View File

@@ -40,8 +40,8 @@ namespace internal {
* |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp
* |cplx |real |real | optimal case, vectorization possible via real-cplx mul
*/
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
@@ -99,7 +99,7 @@ EIGEN_DONT_INLINE static void run(
// How many coeffs of the result do we have to skip to be aligned.
// Here we assume data are at least aligned on the base scalar type.
Index alignedStart = internal::first_aligned(res,size);
Index alignedStart = first_aligned(res,size);
Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
@@ -109,7 +109,7 @@ EIGEN_DONT_INLINE static void run(
: FirstAligned;
// we cannot assume the first element is aligned because of sub-matrices
const Index lhsAlignmentOffset = internal::first_aligned(lhs,size);
const Index lhsAlignmentOffset = first_aligned(lhs,size);
// find how many columns do we have to skip to be aligned with the result (if possible)
Index skipColumns = 0;
@@ -134,7 +134,7 @@ EIGEN_DONT_INLINE static void run(
}
else
{
skipColumns = (std::min)(skipColumns,cols);
skipColumns = std::min(skipColumns,cols);
// note that the skiped columns are processed later.
}
@@ -296,8 +296,8 @@ EIGEN_DONT_INLINE static void run(
* - alpha is always a complex (or converted to a complex)
* - no vectorization
*/
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
@@ -351,7 +351,7 @@ EIGEN_DONT_INLINE static void run(
// How many coeffs of the result do we have to skip to be aligned.
// Here we assume data are at least aligned on the base scalar type
// if that's not the case then vectorization is discarded, see below.
Index alignedStart = internal::first_aligned(rhs, depth);
Index alignedStart = first_aligned(rhs, depth);
Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
@@ -361,7 +361,7 @@ EIGEN_DONT_INLINE static void run(
: FirstAligned;
// we cannot assume the first element is aligned because of sub-matrices
const Index lhsAlignmentOffset = internal::first_aligned(lhs,depth);
const Index lhsAlignmentOffset = first_aligned(lhs,depth);
// find how many rows do we have to skip to be aligned with rhs (if possible)
Index skipRows = 0;
@@ -386,7 +386,7 @@ EIGEN_DONT_INLINE static void run(
}
else
{
skipRows = (std::min)(skipRows,Index(rows));
skipRows = std::min(skipRows,Index(rows));
// note that the skiped columns are processed later.
}
eigen_internal_assert( alignmentPattern==NoneAligned

View File

@@ -1,127 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* General matrix-vector product functionality based on ?GEMV.
********************************************************************************
*/
#ifndef EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
#define EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
namespace internal {
/**********************************************************************
* This file implements general matrix-vector multiplication using BLAS
* gemv function via partial specialization of
* general_matrix_vector_product::run(..) method for float, double,
* std::complex<float> and std::complex<double> types
**********************************************************************/
// gemv specialization
template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct general_matrix_vector_product_gemv :
general_matrix_vector_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,ConjugateRhs,BuiltIn> {};
#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const Scalar* lhs, Index lhsStride, \
const Scalar* rhs, Index rhsIncr, \
Scalar* res, Index resIncr, Scalar alpha) \
{ \
if (ConjugateLhs) { \
general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,BuiltIn>::run( \
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
} else { \
general_matrix_vector_product_gemv<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
} \
} \
}; \
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const Scalar* lhs, Index lhsStride, \
const Scalar* rhs, Index rhsIncr, \
Scalar* res, Index resIncr, Scalar alpha) \
{ \
general_matrix_vector_product_gemv<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
} \
}; \
EIGEN_MKL_GEMV_SPECIALIZE(double)
EIGEN_MKL_GEMV_SPECIALIZE(float)
EIGEN_MKL_GEMV_SPECIALIZE(dcomplex)
EIGEN_MKL_GEMV_SPECIALIZE(scomplex)
#define EIGEN_MKL_GEMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLPREFIX) \
template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
{ \
typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\
\
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* rhs, Index rhsIncr, \
EIGTYPE* res, Index resIncr, EIGTYPE alpha) \
{ \
MKL_INT m=rows, n=cols, lda=lhsStride, incx=rhsIncr, incy=resIncr; \
MKLTYPE alpha_, beta_; \
const EIGTYPE *x_ptr, myone(1); \
char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \
if (LhsStorageOrder==RowMajor) { \
m=cols; \
n=rows; \
}\
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
GEMVVector x_tmp; \
if (ConjugateRhs) { \
Map<const GEMVVector, 0, InnerStride<> > map_x(rhs,cols,1,InnerStride<>(incx)); \
x_tmp=map_x.conjugate(); \
x_ptr=x_tmp.data(); \
incx=1; \
} else x_ptr=rhs; \
MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
}\
};
EIGEN_MKL_GEMV_SPECIALIZATION(double, double, d)
EIGEN_MKL_GEMV_SPECIALIZATION(float, float, s)
EIGEN_MKL_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, z)
EIGEN_MKL_GEMV_SPECIALIZATION(scomplex, MKL_Complex8, c)
} //end of namespase
#endif // EIGEN_GENERAL_MATRIX_VECTOR_MKL_H

View File

@@ -85,9 +85,7 @@ template<typename Index> struct GemmParallelInfo
template<bool Condition, typename Functor, typename Index>
void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
{
// TODO when EIGEN_USE_BLAS is defined,
// we should still enable OMP for other scalar types
#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
#ifndef EIGEN_HAS_OPENMP
// FIXME the transpose variable is only needed to properly split
// the matrix product when multithreading is enabled. This is a temporary
// fix to support row-major destination matrices. This whole

View File

@@ -114,7 +114,7 @@ struct symm_pack_rhs
}
// second part: diagonal block
for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr)
for(Index j2=k2; j2<std::min(k2+rows,packet_cols); j2+=nr)
{
// again we can split vertically in three different parts (transpose, symmetric, normal)
// transpose
@@ -179,7 +179,7 @@ struct symm_pack_rhs
for(Index j2=packet_cols; j2<cols; ++j2)
{
// transpose
Index half = (std::min)(end_k,j2);
Index half = std::min(end_k,j2);
for(Index k=k2; k<half; k++)
{
blockB[count] = conj(rhs(j2,k));
@@ -261,12 +261,12 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
// kc must smaller than mc
kc = (std::min)(kc,mc);
kc = std::min(kc,mc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
Scalar* blockB = allocatedBlockB + sizeW;
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
@@ -276,7 +276,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
for(Index k2=0; k2<size; k2+=kc)
{
const Index actual_kc = (std::min)(k2+kc,size)-k2;
const Index actual_kc = std::min(k2+kc,size)-k2;
// we have selected one row panel of rhs and one column panel of lhs
// pack rhs's panel into a sequential chunk of memory
@@ -289,7 +289,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
// 3 - the panel below the diagonal block => generic packed copy
for(Index i2=0; i2<k2; i2+=mc)
{
const Index actual_mc = (std::min)(i2+mc,k2)-i2;
const Index actual_mc = std::min(i2+mc,k2)-i2;
// transposed packed copy
pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);
@@ -297,7 +297,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
}
// the block diagonal
{
const Index actual_mc = (std::min)(k2+kc,size)-k2;
const Index actual_mc = std::min(k2+kc,size)-k2;
// symmetric packed copy
pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
@@ -306,13 +306,16 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
for(Index i2=k2+kc; i2<size; i2+=mc)
{
const Index actual_mc = (std::min)(i2+mc,size)-i2;
const Index actual_mc = std::min(i2+mc,size)-i2;
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
}
}
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
}
};
@@ -340,10 +343,11 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
Index mc = rows; // cache block size along the M direction
Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
Scalar* blockB = allocatedBlockB + sizeW;
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
@@ -352,19 +356,22 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
for(Index k2=0; k2<size; k2+=kc)
{
const Index actual_kc = (std::min)(k2+kc,size)-k2;
const Index actual_kc = std::min(k2+kc,size)-k2;
pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
// => GEPP
for(Index i2=0; i2<rows; i2+=mc)
{
const Index actual_mc = (std::min)(i2+mc,rows)-i2;
const Index actual_mc = std::min(i2+mc,rows)-i2;
pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
}
}
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
}
};
@@ -400,8 +407,8 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);

View File

@@ -1,291 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
********************************************************************************
*/
#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
#define EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
namespace internal {
/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
#define EIGEN_MKL_SYMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
{\
\
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha) \
{ \
char side='L', uplo='L'; \
MKL_INT m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
MKLTYPE alpha_, beta_; \
MatrixX##EIGPREFIX b_tmp; \
EIGTYPE myone(1);\
\
/* Set transpose options */ \
/* Set m, n, k */ \
m = (MKL_INT)rows; \
n = (MKL_INT)cols; \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
\
/* Set lda, ldb, ldc */ \
lda = (MKL_INT)lhsStride; \
ldb = (MKL_INT)rhsStride; \
ldc = (MKL_INT)resStride; \
\
/* Set a, b, c */ \
if (LhsStorageOrder==RowMajor) uplo='U'; \
a = _lhs; \
\
if (RhsStorageOrder==RowMajor) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = rhs.adjoint(); \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
} else b = _rhs; \
\
MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
\
} \
};
#define EIGEN_MKL_HEMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
{\
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha) \
{ \
char side='L', uplo='L'; \
MKL_INT m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
MKLTYPE alpha_, beta_; \
MatrixX##EIGPREFIX b_tmp; \
Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \
EIGTYPE myone(1); \
\
/* Set transpose options */ \
/* Set m, n, k */ \
m = (MKL_INT)rows; \
n = (MKL_INT)cols; \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
\
/* Set lda, ldb, ldc */ \
lda = (MKL_INT)lhsStride; \
ldb = (MKL_INT)rhsStride; \
ldc = (MKL_INT)resStride; \
\
/* Set a, b, c */ \
if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \
Map<const Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder>, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \
a_tmp = lhs.conjugate(); \
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else a = _lhs; \
if (LhsStorageOrder==RowMajor) uplo='U'; \
\
if (RhsStorageOrder==ColMajor && (!ConjugateRhs)) { \
b = _rhs; } \
else { \
if (RhsStorageOrder==ColMajor && ConjugateRhs) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,m,n,OuterStride<>(rhsStride)); \
b_tmp = rhs.conjugate(); \
} else \
if (ConjugateRhs) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = rhs.adjoint(); \
} else { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = rhs.transpose(); \
} \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
} \
\
MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
\
} \
};
EIGEN_MKL_SYMM_L(double, double, d, d)
EIGEN_MKL_SYMM_L(float, float, f, s)
EIGEN_MKL_HEMM_L(dcomplex, MKL_Complex16, cd, z)
EIGEN_MKL_HEMM_L(scomplex, MKL_Complex8, cf, c)
/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
#define EIGEN_MKL_SYMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
{\
\
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha) \
{ \
char side='R', uplo='L'; \
MKL_INT m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
MKLTYPE alpha_, beta_; \
MatrixX##EIGPREFIX b_tmp; \
EIGTYPE myone(1);\
\
/* Set m, n, k */ \
m = (MKL_INT)rows; \
n = (MKL_INT)cols; \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
\
/* Set lda, ldb, ldc */ \
lda = (MKL_INT)rhsStride; \
ldb = (MKL_INT)lhsStride; \
ldc = (MKL_INT)resStride; \
\
/* Set a, b, c */ \
if (RhsStorageOrder==RowMajor) uplo='U'; \
a = _rhs; \
\
if (LhsStorageOrder==RowMajor) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = lhs.adjoint(); \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
} else b = _lhs; \
\
MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
\
} \
};
#define EIGEN_MKL_HEMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
{\
static EIGEN_DONT_INLINE void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha) \
{ \
char side='R', uplo='L'; \
MKL_INT m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
MKLTYPE alpha_, beta_; \
MatrixX##EIGPREFIX b_tmp; \
Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \
EIGTYPE myone(1); \
\
/* Set m, n, k */ \
m = (MKL_INT)rows; \
n = (MKL_INT)cols; \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
\
/* Set lda, ldb, ldc */ \
lda = (MKL_INT)rhsStride; \
ldb = (MKL_INT)lhsStride; \
ldc = (MKL_INT)resStride; \
\
/* Set a, b, c */ \
if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \
Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \
a_tmp = rhs.conjugate(); \
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else a = _rhs; \
if (RhsStorageOrder==RowMajor) uplo='U'; \
\
if (LhsStorageOrder==ColMajor && (!ConjugateLhs)) { \
b = _lhs; } \
else { \
if (LhsStorageOrder==ColMajor && ConjugateLhs) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,n,OuterStride<>(lhsStride)); \
b_tmp = lhs.conjugate(); \
} else \
if (ConjugateLhs) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
b_tmp = lhs.adjoint(); \
} else { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
b_tmp = lhs.transpose(); \
} \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
} \
\
MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
} \
};
EIGEN_MKL_SYMM_R(double, double, d, d)
EIGEN_MKL_SYMM_R(float, float, f, s)
EIGEN_MKL_HEMM_R(dcomplex, MKL_Complex16, cd, z)
EIGEN_MKL_HEMM_R(scomplex, MKL_Complex8, cf, c)
} // end namespace internal
#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H

View File

@@ -32,15 +32,8 @@ namespace internal {
* the number of load/stores of the result by a factor 2 and to reduce
* the instruction dependency.
*/
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version=Specialized>
struct selfadjoint_matrix_vector_product;
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
struct selfadjoint_matrix_vector_product
{
static EIGEN_DONT_INLINE void run(
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
static EIGEN_DONT_INLINE void product_selfadjoint_vector(
Index size,
const Scalar* lhs, Index lhsStride,
const Scalar* _rhs, Index rhsIncr,
@@ -69,15 +62,17 @@ static EIGEN_DONT_INLINE void run(
// FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed.
// if the rhs is not sequentially stored in memory we copy it to a temporary buffer,
// this is because we need to extract packets
ei_declare_aligned_stack_constructed_variable(Scalar,rhs,size,rhsIncr==1 ? const_cast<Scalar*>(_rhs) : 0);
const Scalar* EIGEN_RESTRICT rhs = _rhs;
if (rhsIncr!=1)
{
Scalar* r = ei_aligned_stack_new(Scalar, size);
const Scalar* it = _rhs;
for (Index i=0; i<size; ++i, it+=rhsIncr)
rhs[i] = *it;
r[i] = *it;
rhs = r;
}
Index bound = (std::max)(Index(0),size-8) & 0xfffffffe;
Index bound = std::max(Index(0),size-8) & 0xfffffffe;
if (FirstTriangular)
bound = size - bound;
@@ -92,14 +87,14 @@ static EIGEN_DONT_INLINE void run(
Scalar t1 = cjAlpha * rhs[j+1];
Packet ptmp1 = pset1<Packet>(t1);
Scalar t2(0);
Scalar t2 = 0;
Packet ptmp2 = pset1<Packet>(t2);
Scalar t3(0);
Scalar t3 = 0;
Packet ptmp3 = pset1<Packet>(t3);
size_t starti = FirstTriangular ? 0 : j+2;
size_t endi = FirstTriangular ? j : size;
size_t alignedStart = (starti) + internal::first_aligned(&res[starti], endi-starti);
size_t alignedStart = (starti) + first_aligned(&res[starti], endi-starti);
size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
// TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
@@ -155,7 +150,7 @@ static EIGEN_DONT_INLINE void run(
register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
Scalar t1 = cjAlpha * rhs[j];
Scalar t2(0);
Scalar t2 = 0;
// TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
res[j] += cjd.pmul(internal::real(A0[j]), t1);
for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++)
@@ -165,8 +160,10 @@ static EIGEN_DONT_INLINE void run(
}
res[j] += alpha * t2;
}
if(rhsIncr!=1)
ei_aligned_stack_delete(Scalar, const_cast<Scalar*>(rhs), size);
}
};
} // end namespace internal
@@ -201,8 +198,8 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols());
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
@@ -214,33 +211,45 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest;
internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs;
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
EvalToDest ? dest.data() : static_dest.data());
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,rhs.size(),
UseRhs ? const_cast<RhsScalar*>(rhs.data()) : static_rhs.data());
if(!EvalToDest)
bool freeDestPtr = false;
ResScalar* actualDestPtr;
if(EvalToDest)
actualDestPtr = dest.data();
else
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if((actualDestPtr=static_dest.data())==0)
{
freeDestPtr = true;
actualDestPtr = ei_aligned_stack_new(ResScalar,dest.size());
}
MappedDest(actualDestPtr, dest.size()) = dest;
}
if(!UseRhs)
bool freeRhsPtr = false;
RhsScalar* actualRhsPtr;
if(UseRhs)
actualRhsPtr = const_cast<RhsScalar*>(rhs.data());
else
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = rhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if((actualRhsPtr=static_rhs.data())==0)
{
freeRhsPtr = true;
actualRhsPtr = ei_aligned_stack_new(RhsScalar,rhs.size());
}
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
}
internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run
internal::product_selfadjoint_vector<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>
(
lhs.rows(), // size
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
@@ -250,7 +259,11 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
);
if(!EvalToDest)
{
dest = MappedDest(actualDestPtr, dest.size());
if(freeDestPtr) ei_aligned_stack_delete(ResScalar, actualDestPtr, dest.size());
}
if(freeRhsPtr) ei_aligned_stack_delete(RhsScalar, actualRhsPtr, rhs.size());
}
};

View File

@@ -1,110 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV.
********************************************************************************
*/
#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
#define EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
namespace internal {
/**********************************************************************
* This file implements selfadjoint matrix-vector multiplication using BLAS
**********************************************************************/
// symv/hemv specialization
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
struct selfadjoint_matrix_vector_product_symv :
selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn> {};
#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
static EIGEN_DONT_INLINE void run( \
Index size, const Scalar* lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \
enum {\
IsColMajor = StorageOrder==ColMajor \
}; \
if (IsColMajor == ConjugateLhs) {\
selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \
size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \
} else {\
selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \
size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \
}\
} \
}; \
EIGEN_MKL_SYMV_SPECIALIZE(double)
EIGEN_MKL_SYMV_SPECIALIZE(float)
EIGEN_MKL_SYMV_SPECIALIZE(dcomplex)
EIGEN_MKL_SYMV_SPECIALIZE(scomplex)
#define EIGEN_MKL_SYMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLFUNC) \
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \
{ \
typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
\
static EIGEN_DONT_INLINE void run( \
Index size, const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \
{ \
enum {\
IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \
IsLower = UpLo == Lower ? 1 : 0, \
}; \
MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \
MKLTYPE alpha_, beta_; \
const EIGTYPE *x_ptr, myone(1); \
char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
SYMVVector x_tmp; \
if (ConjugateRhs) { \
Map<const SYMVVector, 0, InnerStride<> > map_x(_rhs,size,1,InnerStride<>(incx)); \
x_tmp=map_x.conjugate(); \
x_ptr=x_tmp.data(); \
incx=1; \
} else x_ptr=_rhs; \
MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
}\
};
EIGEN_MKL_SYMV_SPECIALIZATION(double, double, dsymv)
EIGEN_MKL_SYMV_SPECIALIZATION(float, float, ssymv)
EIGEN_MKL_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
EIGEN_MKL_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv)
} //end of namespase
#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H

View File

@@ -72,7 +72,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
typedef internal::blas_traits<OtherType> OtherBlasTraits;
typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
const ActualOtherType actualOther = OtherBlasTraits::extract(other.derived());
Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
@@ -81,17 +81,27 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
UseOtherDirectly = _ActualOtherType::InnerStrideAtCompileTime==1
};
internal::gemv_static_vector_if<Scalar,OtherType::SizeAtCompileTime,OtherType::MaxSizeAtCompileTime,!UseOtherDirectly> static_other;
ei_declare_aligned_stack_constructed_variable(Scalar, actualOtherPtr, other.size(),
(UseOtherDirectly ? const_cast<Scalar*>(actualOther.data()) : static_other.data()));
if(!UseOtherDirectly)
bool freeOtherPtr = false;
Scalar* actualOtherPtr;
if(UseOtherDirectly)
actualOtherPtr = const_cast<Scalar*>(actualOther.data());
else
{
if((actualOtherPtr=static_other.data())==0)
{
freeOtherPtr = true;
actualOtherPtr = ei_aligned_stack_new(Scalar,other.size());
}
Map<typename _ActualOtherType::PlainObject>(actualOtherPtr, actualOther.size()) = actualOther;
}
selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
(!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex>
::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualAlpha);
if((!UseOtherDirectly) && freeOtherPtr) ei_aligned_stack_delete(Scalar, actualOtherPtr, other.size());
}
};
@@ -105,12 +115,12 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
typedef internal::blas_traits<OtherType> OtherBlasTraits;
typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
const ActualOtherType actualOther = OtherBlasTraits::extract(other.derived());
Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
internal::general_matrix_matrix_triangular_product<Index,
Scalar, _ActualOtherType::Flags&RowMajorBit ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
Scalar, _ActualOtherType::Flags&RowMajorBit ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,

View File

@@ -76,12 +76,12 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
typedef internal::blas_traits<DerivedU> UBlasTraits;
typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
typedef typename internal::remove_all<ActualUType>::type _ActualUType;
typename internal::add_const_on_value_type<ActualUType>::type actualU = UBlasTraits::extract(u.derived());
const ActualUType actualU = UBlasTraits::extract(u.derived());
typedef internal::blas_traits<DerivedV> VBlasTraits;
typedef typename VBlasTraits::DirectLinearAccessType ActualVType;
typedef typename internal::remove_all<ActualVType>::type _ActualVType;
typename internal::add_const_on_value_type<ActualVType>::type actualV = VBlasTraits::extract(v.derived());
const ActualVType actualV = VBlasTraits::extract(v.derived());
// If MatrixType is row major, then we use the routine for lower triangular in the upper triangular case and
// vice versa, and take the complex conjugate of all coefficients and vector entries.

View File

@@ -58,16 +58,16 @@ template <typename Scalar, typename Index,
int Mode, bool LhsIsTriangular,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs,
int ResStorageOrder, int Version = Specialized>
int ResStorageOrder>
struct product_triangular_matrix_matrix;
template <typename Scalar, typename Index,
int Mode, bool LhsIsTriangular,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs, int Version>
int RhsStorageOrder, bool ConjugateRhs>
struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,RowMajor,Version>
RhsStorageOrder,ConjugateRhs,RowMajor>
{
static EIGEN_STRONG_INLINE void run(
Index rows, Index cols, Index depth,
@@ -91,43 +91,38 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
// implements col-major += alpha * op(triangular) * op(general)
template <typename Scalar, typename Index, int Mode,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs, int Version>
int RhsStorageOrder, bool ConjugateRhs>
struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,ColMajor,Version>
RhsStorageOrder,ConjugateRhs,ColMajor>
{
typedef gebp_traits<Scalar,Scalar> Traits;
enum {
SmallPanelWidth = 2 * EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
IsLower = (Mode&Lower) == Lower,
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
};
static EIGEN_DONT_INLINE void run(
Index _rows, Index _cols, Index _depth,
Index rows, Index cols, Index depth,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
Scalar alpha)
{
// strip zeros
Index diagSize = (std::min)(_rows,_depth);
Index rows = IsLower ? _rows : diagSize;
Index depth = IsLower ? diagSize : _depth;
Index cols = _cols;
const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
typedef gebp_traits<Scalar,Scalar> Traits;
enum {
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
IsLower = (Mode&Lower) == Lower,
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
};
Index kc = depth; // cache block size along the K direction
Index mc = rows; // cache block size along the M direction
Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
Scalar* blockB = allocatedBlockB + sizeW;
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
@@ -145,7 +140,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
IsLower ? k2>0 : k2<depth;
IsLower ? k2-=kc : k2+=kc)
{
Index actual_kc = (std::min)(IsLower ? k2 : depth-k2, kc);
Index actual_kc = std::min(IsLower ? k2 : depth-k2, kc);
Index actual_k2 = IsLower ? k2-actual_kc : k2;
// align blocks with the end of the triangular part for trapezoidal lhs
@@ -158,11 +153,10 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
pack_rhs(blockB, &rhs(actual_k2,0), rhsStride, actual_kc, cols);
// the selected lhs's panel has to be split in three different parts:
// 1 - the part which is zero => skip it
// 1 - the part which is above the diagonal block => skip it
// 2 - the diagonal block => special kernel
// 3 - the dense panel below (lower case) or above (upper case) the diagonal block => GEPP
// the block diagonal, if any:
// 3 - the panel below the diagonal block => GEPP
// the block diagonal, if any
if(IsLower || actual_k2<rows)
{
// for each small vertical panels of lhs
@@ -200,13 +194,13 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
}
}
}
// the part below (lower case) or above (upper case) the diagonal => GEPP
// the part below the diagonal => GEPP
{
Index start = IsLower ? k2 : 0;
Index end = IsLower ? rows : (std::min)(actual_k2,rows);
Index end = IsLower ? rows : std::min(actual_k2,rows);
for(Index i2=start; i2<end; i2+=mc)
{
const Index actual_mc = (std::min)(i2+mc,end)-i2;
const Index actual_mc = std::min(i2+mc,end)-i2;
gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
@@ -214,49 +208,48 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
}
}
}
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
// delete[] allocatedBlockB;
}
};
// implements col-major += alpha * op(general) * op(triangular)
template <typename Scalar, typename Index, int Mode,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs, int Version>
int RhsStorageOrder, bool ConjugateRhs>
struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,ColMajor,Version>
RhsStorageOrder,ConjugateRhs,ColMajor>
{
typedef gebp_traits<Scalar,Scalar> Traits;
enum {
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
IsLower = (Mode&Lower) == Lower,
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
};
static EIGEN_DONT_INLINE void run(
Index _rows, Index _cols, Index _depth,
Index rows, Index cols, Index depth,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
Scalar alpha)
{
// strip zeros
Index diagSize = (std::min)(_cols,_depth);
Index rows = _rows;
Index depth = IsLower ? _depth : diagSize;
Index cols = IsLower ? diagSize : _cols;
const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
typedef gebp_traits<Scalar,Scalar> Traits;
enum {
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
IsLower = (Mode&Lower) == Lower,
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
};
Index kc = depth; // cache block size along the K direction
Index mc = rows; // cache block size along the M direction
Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar,sizeB);
Scalar* blockB = allocatedBlockB + sizeW;
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
@@ -275,7 +268,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
IsLower ? k2<depth : k2>0;
IsLower ? k2+=kc : k2-=kc)
{
Index actual_kc = (std::min)(IsLower ? depth-k2 : k2, kc);
Index actual_kc = std::min(IsLower ? depth-k2 : k2, kc);
Index actual_k2 = IsLower ? k2 : k2-actual_kc;
// align blocks with the end of the triangular part for trapezoidal rhs
@@ -286,7 +279,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
}
// remaining size
Index rs = IsLower ? (std::min)(cols,actual_k2) : cols - k2;
Index rs = IsLower ? std::min(cols,actual_k2) : cols - k2;
// size of the triangular part
Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
@@ -327,7 +320,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
for (Index i2=0; i2<rows; i2+=mc)
{
const Index actual_mc = (std::min)(mc,rows-i2);
const Index actual_mc = std::min(mc,rows-i2);
pack_lhs(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
// triangular kernel
@@ -354,6 +347,9 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
-1, -1, 0, 0, allocatedBlockB);
}
}
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
}
};
@@ -378,8 +374,8 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);

View File

@@ -1,305 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Triangular matrix * matrix product functionality based on ?TRMM.
********************************************************************************
*/
#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
#define EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
namespace internal {
template <typename Scalar, typename Index,
int Mode, bool LhsIsTriangular,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs,
int ResStorageOrder>
struct product_triangular_matrix_matrix_trmm :
product_triangular_matrix_matrix<Scalar,Index,Mode,
LhsIsTriangular,LhsStorageOrder,ConjugateLhs,
RhsStorageOrder, ConjugateRhs, ResStorageOrder, BuiltIn> {};
// try to go to BLAS specialization
#define EIGEN_MKL_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,Specialized> { \
static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\
const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) { \
product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \
LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \
RhsStorageOrder, ConjugateRhs, ColMajor>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
} \
};
EIGEN_MKL_TRMM_SPECIALIZE(double, true)
EIGEN_MKL_TRMM_SPECIALIZE(double, false)
EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, true)
EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, false)
EIGEN_MKL_TRMM_SPECIALIZE(float, true)
EIGEN_MKL_TRMM_SPECIALIZE(float, false)
EIGEN_MKL_TRMM_SPECIALIZE(scomplex, true)
EIGEN_MKL_TRMM_SPECIALIZE(scomplex, false)
// implements col-major += alpha * op(triangular) * op(general)
#define EIGEN_MKL_TRMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper, \
conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \
}; \
\
static EIGEN_DONT_INLINE void run( \
Index _rows, Index _cols, Index _depth, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha) \
{ \
Index diagSize = (std::min)(_rows,_depth); \
Index rows = IsLower ? _rows : diagSize; \
Index depth = IsLower ? diagSize : _depth; \
Index cols = _cols; \
\
typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
\
/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
if (rows != depth) { \
\
int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
\
if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
/* Most likely no benefit to call TRMM or GEMM from MKL*/ \
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
/*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \
} else { \
/* Make sense to call GEMM */ \
Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
MKL_INT aStride = aa_tmp.outerStride(); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> blocking(_rows,_cols,_depth); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
\
/*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
} \
return; \
} \
char side = 'L', transa, uplo, diag = 'N'; \
EIGTYPE *b; \
const EIGTYPE *a; \
MKL_INT m, n, k, lda, ldb, ldc; \
MKLTYPE alpha_; \
\
/* Set alpha_*/ \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
\
/* Set m, n */ \
m = (MKL_INT)diagSize; \
n = (MKL_INT)cols; \
\
/* Set trans */ \
transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
\
/* Set b, ldb */ \
Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols,OuterStride<>(rhsStride)); \
MatrixX##EIGPREFIX b_tmp; \
\
if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
\
/* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \
if (LhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
/* Set a, lda */ \
Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
MatrixLhs a_tmp; \
\
if ((conjA!=0) || (SetDiag==0)) { \
if (conjA) a_tmp = lhs.conjugate(); else a_tmp = lhs; \
if (IsZeroDiag) \
a_tmp.diagonal().setZero(); \
else if (IsUnitDiag) \
a_tmp.diagonal().setOnes();\
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else { \
a = _lhs; \
lda = lhsStride; \
} \
/*std::cout << "TRMM_L: A is square! Go to MKL TRMM implementation! \n";*/ \
/* call ?trmm*/ \
MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
\
/* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
res_tmp=res_tmp+b_tmp; \
} \
};
EIGEN_MKL_TRMM_L(double, double, d, d)
EIGEN_MKL_TRMM_L(dcomplex, MKL_Complex16, cd, z)
EIGEN_MKL_TRMM_L(float, float, f, s)
EIGEN_MKL_TRMM_L(scomplex, MKL_Complex8, cf, c)
// implements col-major += alpha * op(general) * op(triangular)
#define EIGEN_MKL_TRMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper, \
conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \
}; \
\
static EIGEN_DONT_INLINE void run( \
Index _rows, Index _cols, Index _depth, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha) \
{ \
Index diagSize = (std::min)(_cols,_depth); \
Index rows = _rows; \
Index depth = IsLower ? _depth : diagSize; \
Index cols = IsLower ? diagSize : _cols; \
\
typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
\
/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
if (cols != depth) { \
\
int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
\
if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
/* Most likely no benefit to call TRMM or GEMM from MKL*/ \
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
/*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \
} else { \
/* Make sense to call GEMM */ \
Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
MKL_INT aStride = aa_tmp.outerStride(); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> blocking(_rows,_cols,_depth); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, blocking); \
\
/*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
} \
return; \
} \
char side = 'R', transa, uplo, diag = 'N'; \
EIGTYPE *b; \
const EIGTYPE *a; \
MKL_INT m, n, k, lda, ldb, ldc; \
MKLTYPE alpha_; \
\
/* Set alpha_*/ \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
\
/* Set m, n */ \
m = (MKL_INT)rows; \
n = (MKL_INT)diagSize; \
\
/* Set trans */ \
transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
\
/* Set b, ldb */ \
Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
MatrixX##EIGPREFIX b_tmp; \
\
if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
\
/* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \
if (RhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
/* Set a, lda */ \
Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols, OuterStride<>(rhsStride)); \
MatrixRhs a_tmp; \
\
if ((conjA!=0) || (SetDiag==0)) { \
if (conjA) a_tmp = rhs.conjugate(); else a_tmp = rhs; \
if (IsZeroDiag) \
a_tmp.diagonal().setZero(); \
else if (IsUnitDiag) \
a_tmp.diagonal().setOnes();\
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else { \
a = _rhs; \
lda = rhsStride; \
} \
/*std::cout << "TRMM_R: A is square! Go to MKL TRMM implementation! \n";*/ \
/* call ?trmm*/ \
MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
\
/* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
res_tmp=res_tmp+b_tmp; \
} \
};
EIGEN_MKL_TRMM_R(double, double, d, d)
EIGEN_MKL_TRMM_R(dcomplex, MKL_Complex16, cd, z)
EIGEN_MKL_TRMM_R(float, float, f, s)
EIGEN_MKL_TRMM_R(scomplex, MKL_Complex8, cf, c)
} // end namespace internal
#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H

View File

@@ -27,25 +27,24 @@
namespace internal {
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder, int Version=Specialized>
struct triangular_matrix_vector_product;
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder>
struct product_triangular_matrix_vector;
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs>
struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum {
IsLower = ((Mode&Lower)==Lower),
HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
HasUnitDiag = (Mode & UnitDiag)==UnitDiag
};
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
static EIGEN_DONT_INLINE void run(Index rows, Index cols, const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
{
EIGEN_UNUSED_VARIABLE(resIncr);
eigen_assert(resIncr==1);
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
Index size = (std::min)(_rows,_cols);
Index rows = IsLower ? _rows : (std::min)(_rows,_cols);
Index cols = IsLower ? (std::min)(_rows,_cols) : _cols;
typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap;
const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride));
@@ -58,57 +57,48 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
typedef Map<Matrix<ResScalar,Dynamic,1> > ResMap;
ResMap res(_res,rows);
for (Index pi=0; pi<size; pi+=PanelWidth)
for (Index pi=0; pi<cols; pi+=PanelWidth)
{
Index actualPanelWidth = (std::min)(PanelWidth, size-pi);
Index actualPanelWidth = std::min(PanelWidth, cols-pi);
for (Index k=0; k<actualPanelWidth; ++k)
{
Index i = pi + k;
Index s = IsLower ? ((HasUnitDiag||HasZeroDiag) ? i+1 : i ) : pi;
Index s = IsLower ? (HasUnitDiag ? i+1 : i ) : pi;
Index r = IsLower ? actualPanelWidth-k : k+1;
if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0)
if ((!HasUnitDiag) || (--r)>0)
res.segment(s,r) += (alpha * cjRhs.coeff(i)) * cjLhs.col(i).segment(s,r);
if (HasUnitDiag)
res.coeffRef(i) += alpha * cjRhs.coeff(i);
}
Index r = IsLower ? rows - pi - actualPanelWidth : pi;
Index r = IsLower ? cols - pi - actualPanelWidth : pi;
if (r>0)
{
Index s = IsLower ? pi+actualPanelWidth : 0;
general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs,BuiltIn>::run(
general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs>::run(
r, actualPanelWidth,
&lhs.coeffRef(s,pi), lhsStride,
&rhs.coeffRef(pi), rhsIncr,
&res.coeffRef(s), resIncr, alpha);
}
}
if((!IsLower) && cols>size)
{
general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs>::run(
rows, cols-size,
&lhs.coeffRef(0,size), lhsStride,
&rhs.coeffRef(size), rhsIncr,
_res, resIncr, alpha);
}
}
};
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs>
struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum {
IsLower = ((Mode&Lower)==Lower),
HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
HasUnitDiag = (Mode & UnitDiag)==UnitDiag
};
static void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
static void run(Index rows, Index cols, const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
{
eigen_assert(rhsIncr==1);
EIGEN_UNUSED_VARIABLE(rhsIncr);
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
Index diagSize = (std::min)(_rows,_cols);
Index rows = IsLower ? _rows : diagSize;
Index cols = IsLower ? diagSize : _cols;
typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap;
const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride));
@@ -121,15 +111,15 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
typedef Map<Matrix<ResScalar,Dynamic,1>, 0, InnerStride<> > ResMap;
ResMap res(_res,rows,InnerStride<>(resIncr));
for (Index pi=0; pi<diagSize; pi+=PanelWidth)
for (Index pi=0; pi<cols; pi+=PanelWidth)
{
Index actualPanelWidth = (std::min)(PanelWidth, diagSize-pi);
Index actualPanelWidth = std::min(PanelWidth, cols-pi);
for (Index k=0; k<actualPanelWidth; ++k)
{
Index i = pi + k;
Index s = IsLower ? pi : ((HasUnitDiag||HasZeroDiag) ? i+1 : i);
Index s = IsLower ? pi : (HasUnitDiag ? i+1 : i);
Index r = IsLower ? k+1 : actualPanelWidth-k;
if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0)
if ((!HasUnitDiag) || (--r)>0)
res.coeffRef(i) += alpha * (cjLhs.row(i).segment(s,r).cwiseProduct(cjRhs.segment(s,r).transpose())).sum();
if (HasUnitDiag)
res.coeffRef(i) += alpha * cjRhs.coeff(i);
@@ -138,21 +128,13 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
if (r>0)
{
Index s = IsLower ? 0 : pi + actualPanelWidth;
general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs,BuiltIn>::run(
general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs>::run(
actualPanelWidth, r,
&lhs.coeffRef(pi,s), lhsStride,
&rhs.coeffRef(s), rhsIncr,
&res.coeffRef(pi), resIncr, alpha);
}
}
if(IsLower && rows>diagSize)
{
general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs>::run(
rows-diagSize, cols,
&lhs.coeffRef(diagSize,0), lhsStride,
&rhs.coeffRef(0), rhsIncr,
&res.coeffRef(diagSize), resIncr, alpha);
}
}
};
@@ -203,8 +185,8 @@ struct TriangularProduct<Mode,false,Lhs,true,Rhs,false>
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
typedef TriangularProduct<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose;
typedef TriangularProduct<(Mode & UnitDiag) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose;
Transpose<Dest> dstT(dst);
internal::trmv_selector<(int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>::run(
TriangularProductTranspose(m_rhs.transpose(),m_lhs.transpose()), dstT, alpha);
@@ -232,8 +214,8 @@ template<> struct trmv_selector<ColMajor>
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
const ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
const ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
@@ -253,15 +235,23 @@ template<> struct trmv_selector<ColMajor>
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
evalToDest ? dest.data() : static_dest.data());
if(!evalToDest)
ResScalar* actualDestPtr;
bool freeDestPtr = false;
if (evalToDest)
{
actualDestPtr = dest.data();
}
else
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if((actualDestPtr = static_dest.data())==0)
{
freeDestPtr = true;
actualDestPtr = ei_aligned_stack_new(ResScalar,dest.size());
}
if(!alphaIsCompatible)
{
MappedDest(actualDestPtr, dest.size()).setZero();
@@ -271,7 +261,7 @@ template<> struct trmv_selector<ColMajor>
MappedDest(actualDestPtr, dest.size()) = dest;
}
internal::triangular_matrix_vector_product
internal::product_triangular_matrix_vector
<Index,Mode,
LhsScalar, LhsBlasTraits::NeedToConjugate,
RhsScalar, RhsBlasTraits::NeedToConjugate,
@@ -287,6 +277,7 @@ template<> struct trmv_selector<ColMajor>
dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
else
dest = MappedDest(actualDestPtr, dest.size());
if(freeDestPtr) ei_aligned_stack_delete(ResScalar, actualDestPtr, dest.size());
}
}
};
@@ -319,19 +310,27 @@ template<> struct trmv_selector<RowMajor>
gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
if(!DirectlyUseRhs)
RhsScalar* actualRhsPtr;
bool freeRhsPtr = false;
if (DirectlyUseRhs)
{
actualRhsPtr = const_cast<RhsScalar*>(actualRhs.data());
}
else
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = actualRhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if((actualRhsPtr = static_rhs.data())==0)
{
freeRhsPtr = true;
actualRhsPtr = ei_aligned_stack_new(RhsScalar, actualRhs.size());
}
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
}
internal::triangular_matrix_vector_product
internal::product_triangular_matrix_vector
<Index,Mode,
LhsScalar, LhsBlasTraits::NeedToConjugate,
RhsScalar, RhsBlasTraits::NeedToConjugate,
@@ -341,6 +340,8 @@ template<> struct trmv_selector<RowMajor>
actualRhsPtr,1,
dest.data(),dest.innerStride(),
actualAlpha);
if((!DirectlyUseRhs) && freeRhsPtr) ei_aligned_stack_delete(RhsScalar, actualRhsPtr, prod.rhs().size());
}
};

View File

@@ -1,245 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Triangular matrix-vector product functionality based on ?TRMV.
********************************************************************************
*/
#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
#define EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
namespace internal {
/**********************************************************************
* This file implements triangular matrix-vector multiplication using BLAS
**********************************************************************/
// trmv/hemv specialization
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder>
struct triangular_matrix_vector_product_trmv :
triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,StorageOrder,BuiltIn> {};
#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
} \
}; \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
} \
};
EIGEN_MKL_TRMV_SPECIALIZE(double)
EIGEN_MKL_TRMV_SPECIALIZE(float)
EIGEN_MKL_TRMV_SPECIALIZE(dcomplex)
EIGEN_MKL_TRMV_SPECIALIZE(scomplex)
// implements col-major: res += alpha * op(triangular) * vector
#define EIGEN_MKL_TRMV_CM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
enum { \
IsLower = (Mode&Lower) == Lower, \
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper \
}; \
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
{ \
if (ConjLhs || IsZeroDiag) { \
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
return; \
}\
Index size = (std::min)(_rows,_cols); \
Index rows = IsLower ? _rows : size; \
Index cols = IsLower ? size : _cols; \
\
typedef VectorX##EIGPREFIX VectorRhs; \
EIGTYPE *x, *y;\
\
/* Set x*/ \
Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
VectorRhs x_tmp; \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
\
/* Square part handling */\
\
char trans, uplo, diag; \
MKL_INT m, n, k, lda, incx, incy; \
EIGTYPE const *a; \
MKLTYPE alpha_, beta_; \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
\
/* Set m, n */ \
n = (MKL_INT)size; \
lda = lhsStride; \
incx = 1; \
incy = resIncr; \
\
/* Set uplo, trans and diag*/ \
trans = 'N'; \
uplo = IsLower ? 'L' : 'U'; \
diag = IsUnitDiag ? 'U' : 'N'; \
\
/* call ?TRMV*/ \
std::cout << "TRMV: CM\n";\
MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \
\
/* Add op(a_tr)rhs into res*/ \
MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \
typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
if (size<rows) { \
y = _res + size*resIncr; \
a = _lhs + size; \
m = rows-size; \
n = size; \
} \
if (size<cols) { \
x += size; \
y = _res; \
a = _lhs + size*lda; \
m = size; \
n = cols-size; \
} \
MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \
} \
} \
};
EIGEN_MKL_TRMV_CM(double, double, d, d)
EIGEN_MKL_TRMV_CM(dcomplex, MKL_Complex16, cd, z)
EIGEN_MKL_TRMV_CM(float, float, f, s)
EIGEN_MKL_TRMV_CM(scomplex, MKL_Complex8, cf, c)
// implements row-major: res += alpha * op(triangular) * vector
#define EIGEN_MKL_TRMV_RM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
enum { \
IsLower = (Mode&Lower) == Lower, \
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper \
}; \
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
{ \
if (IsZeroDiag) { \
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
return; \
}\
Index size = (std::min)(_rows,_cols); \
Index rows = IsLower ? _rows : size; \
Index cols = IsLower ? size : _cols; \
\
typedef VectorX##EIGPREFIX VectorRhs; \
EIGTYPE *x, *y;\
\
/* Set x*/ \
Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
VectorRhs x_tmp; \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
\
/* Square part handling */\
\
char trans, uplo, diag; \
MKL_INT m, n, k, lda, incx, incy; \
EIGTYPE const *a; \
MKLTYPE alpha_, beta_; \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
\
/* Set m, n */ \
n = (MKL_INT)size; \
lda = lhsStride; \
incx = 1; \
incy = resIncr; \
\
/* Set uplo, trans and diag*/ \
trans = ConjLhs ? 'C' : 'T'; \
uplo = IsLower ? 'U' : 'L'; \
diag = IsUnitDiag ? 'U' : 'N'; \
\
/* call ?TRMV*/ \
std::cout << "TRMV: RM\n";\
MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \
\
/* Add op(a_tr)rhs into res*/ \
MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \
typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
if (size<rows) { \
y = _res + size*resIncr; \
a = _lhs + size*lda; \
m = rows-size; \
n = size; \
} \
if (size<cols) { \
x += size; \
y = _res; \
a = _lhs + size; \
m = size; \
n = cols-size; \
} \
MKLPREFIX##gemv(&trans, &n, &m, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \
} \
} \
};
EIGEN_MKL_TRMV_RM(double, double, d, d)
EIGEN_MKL_TRMV_RM(dcomplex, MKL_Complex16, cd, z)
EIGEN_MKL_TRMV_RM(float, float, f, s)
EIGEN_MKL_TRMV_RM(scomplex, MKL_Complex8, cf, c)
} //end of namespase
#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H

View File

@@ -70,48 +70,38 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
Scalar* blockB = allocatedBlockB + sizeW;
Scalar* blockW = allocatedBlockB;
conj_if<Conjugate> conj;
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs;
// the goal here is to subdivise the Rhs panels such that we keep some cache
// coherence when accessing the rhs elements
std::ptrdiff_t l1, l2;
manage_caching_sizes(GetAction, &l1, &l2);
Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * otherStride) : 0;
subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr);
for(Index k2=IsLower ? 0 : size;
IsLower ? k2<size : k2>0;
IsLower ? k2+=kc : k2-=kc)
{
const Index actual_kc = (std::min)(IsLower ? size-k2 : k2, kc);
const Index actual_kc = std::min(IsLower ? size-k2 : k2, kc);
// We have selected and packed a big horizontal panel R1 of rhs. Let B be the packed copy of this panel,
// and R2 the remaining part of rhs. The corresponding vertical panel of lhs is split into
// A11 (the triangular part) and A21 the remaining rectangular part.
// Then the high level algorithm is:
// - B = R1 => general block copy (done during the next step)
// - R1 = A11^-1 B => tricky part
// - R1 = L1^-1 B => tricky part
// - update B from the new R1 => actually this has to be performed continuously during the above step
// - R2 -= A21 * B => GEPP
// - R2 = L2 * B => GEPP
// The tricky part: compute R1 = A11^-1 B while updating B from R1
// The idea is to split A11 into multiple small vertical panels.
// Each panel can be split into a small triangular part T1k which is processed without optimization,
// and the remaining small part T2k which is processed using gebp with appropriate block strides
for(Index j2=0; j2<cols; j2+=subcols)
// The tricky part: compute R1 = L1^-1 B while updating B from R1
// The idea is to split L1 into multiple small vertical panels.
// Each panel can be split into a small triangular part A1 which is processed without optimization,
// and the remaining small part A2 which is processed using gebp with appropriate block strides
{
Index actual_cols = (std::min)(cols-j2,subcols);
// for each small vertical panels [T1k^T, T2k^T]^T of lhs
// for each small vertical panels of lhs
for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth)
{
Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
@@ -124,11 +114,11 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
Index rs = actualPanelWidth - k - 1; // remaining size
Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
for (Index j=j2; j<j2+actual_cols; ++j)
for (Index j=0; j<cols; ++j)
{
if (TriStorageOrder==RowMajor)
{
Scalar b(0);
Scalar b = 0;
const Scalar* l = &tri(i,s);
Scalar* r = &other(s,j);
for (Index i3=0; i3<k; ++i3)
@@ -153,7 +143,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
Index blockBOffset = IsLower ? k1 : lengthTarget;
// update the respective rows of B from other
pack_rhs(blockB+actual_kc*j2, &other(startBlock,j2), otherStride, actualPanelWidth, actual_cols, actual_kc, blockBOffset);
pack_rhs(blockB, _other+startBlock, otherStride, actualPanelWidth, cols, actual_kc, blockBOffset);
// GEBP
if (lengthTarget>0)
@@ -162,19 +152,19 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
pack_lhs(blockA, &tri(startTarget,startBlock), triStride, actualPanelWidth, lengthTarget);
gebp_kernel(&other(startTarget,j2), otherStride, blockA, blockB+actual_kc*j2, lengthTarget, actualPanelWidth, actual_cols, Scalar(-1),
actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
gebp_kernel(_other+startTarget, otherStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, Scalar(-1),
actualPanelWidth, actual_kc, 0, blockBOffset);
}
}
}
// R2 -= A21 * B => GEPP
// R2 = A2 * B => GEPP
{
Index start = IsLower ? k2+kc : 0;
Index end = IsLower ? size : k2-kc;
for(Index i2=start; i2<end; i2+=mc)
{
const Index actual_mc = (std::min)(mc,end-i2);
const Index actual_mc = std::min(mc,end-i2);
if (actual_mc>0)
{
pack_lhs(blockA, &tri(i2, IsLower ? k2 : k2-kc), triStride, actual_kc, actual_mc);
@@ -184,6 +174,9 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
}
}
}
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
}
};
@@ -216,10 +209,10 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
Index nc = rows; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*size;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
Scalar* blockB = allocatedBlockB + sizeW;
conj_if<Conjugate> conj;
@@ -232,7 +225,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
IsLower ? k2>0 : k2<size;
IsLower ? k2-=kc : k2+=kc)
{
const Index actual_kc = (std::min)(IsLower ? k2 : size-k2, kc);
const Index actual_kc = std::min(IsLower ? k2 : size-k2, kc);
Index actual_k2 = IsLower ? k2-actual_kc : k2 ;
Index startPanel = IsLower ? 0 : k2+actual_kc;
@@ -261,7 +254,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
for(Index i2=0; i2<rows; i2+=mc)
{
const Index actual_mc = (std::min)(mc,rows-i2);
const Index actual_mc = std::min(mc,rows-i2);
// triangular solver kernel
{
@@ -321,6 +314,9 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
-1, -1, 0, 0, allocatedBlockB);
}
}
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
}
};

View File

@@ -1,151 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Triangular matrix * matrix product functionality based on ?TRMM.
********************************************************************************
*/
#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
#define EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
namespace internal {
// implements LeftSide op(triangular)^-1 * general
#define EIGEN_MKL_TRSM_L(EIGTYPE, MKLTYPE, MKLPREFIX) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
}; \
static EIGEN_DONT_INLINE void run( \
Index size, Index otherSize, \
const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherStride) \
{ \
MKL_INT m = size, n = otherSize, lda, ldb; \
char side = 'L', uplo, diag='N', transa; \
/* Set alpha_ */ \
MKLTYPE alpha; \
EIGTYPE myone(1); \
assign_scalar_eig2mkl(alpha, myone); \
ldb = otherStride;\
\
const EIGTYPE *a; \
/* Set trans */ \
transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
/* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \
if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
/* Set a, lda */ \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
MatrixTri a_tmp; \
\
if (conjA) { \
a_tmp = tri.conjugate(); \
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else { \
a = _tri; \
lda = triStride; \
} \
if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \
MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \
} \
};
EIGEN_MKL_TRSM_L(double, double, d)
EIGEN_MKL_TRSM_L(dcomplex, MKL_Complex16, z)
EIGEN_MKL_TRSM_L(float, float, s)
EIGEN_MKL_TRSM_L(scomplex, MKL_Complex8, c)
// implements RightSide general * op(triangular)^-1
#define EIGEN_MKL_TRSM_R(EIGTYPE, MKLTYPE, MKLPREFIX) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
}; \
static EIGEN_DONT_INLINE void run( \
Index size, Index otherSize, \
const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherStride) \
{ \
MKL_INT m = otherSize, n = size, lda, ldb; \
char side = 'R', uplo, diag='N', transa; \
/* Set alpha_ */ \
MKLTYPE alpha; \
EIGTYPE myone(1); \
assign_scalar_eig2mkl(alpha, myone); \
ldb = otherStride;\
\
const EIGTYPE *a; \
/* Set trans */ \
transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
/* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \
if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
/* Set a, lda */ \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
MatrixTri a_tmp; \
\
if (conjA) { \
a_tmp = tri.conjugate(); \
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else { \
a = _tri; \
lda = triStride; \
} \
if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \
MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \
/*std::cout << "TRMS_L specialization!\n";*/ \
} \
};
EIGEN_MKL_TRSM_R(double, double, d)
EIGEN_MKL_TRSM_R(dcomplex, MKL_Complex16, z)
EIGEN_MKL_TRSM_R(float, float, s)
EIGEN_MKL_TRSM_R(scomplex, MKL_Complex8, c)
} // end namespace internal
#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H

View File

@@ -60,7 +60,7 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
IsLower ? pi<size : pi>0;
IsLower ? pi+=PanelWidth : pi-=PanelWidth)
{
Index actualPanelWidth = (std::min)(IsLower ? size - pi : pi, PanelWidth);
Index actualPanelWidth = std::min(IsLower ? size - pi : pi, PanelWidth);
Index r = IsLower ? pi : size - pi; // remaining size
if (r > 0)
@@ -114,7 +114,7 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
IsLower ? pi<size : pi>0;
IsLower ? pi+=PanelWidth : pi-=PanelWidth)
{
Index actualPanelWidth = (std::min)(IsLower ? size - pi : pi, PanelWidth);
Index actualPanelWidth = std::min(IsLower ? size - pi : pi, PanelWidth);
Index startBlock = IsLower ? pi : pi-actualPanelWidth;
Index endBlock = IsLower ? pi + actualPanelWidth : 0;

View File

@@ -47,7 +47,7 @@ template<
int ResStorageOrder>
struct general_matrix_matrix_product;
template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version=Specialized>
template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct general_matrix_vector_product;
@@ -56,15 +56,11 @@ template<bool Conjugate> struct conj_if;
template<> struct conj_if<true> {
template<typename T>
inline T operator()(const T& x) { return conj(x); }
template<typename T>
inline T pconj(const T& x) { return internal::pconj(x); }
};
template<> struct conj_if<false> {
template<typename T>
inline const T& operator()(const T& x) { return x; }
template<typename T>
inline const T& pconj(const T& x) { return x; }
};
template<typename Scalar> struct conj_helper<Scalar,Scalar,false,false>
@@ -122,11 +118,11 @@ template<typename RealScalar,bool Conj> struct conj_helper<RealScalar, std::comp
};
template<typename From,typename To> struct get_factor {
static EIGEN_STRONG_INLINE To run(const From& x) { return x; }
EIGEN_STRONG_INLINE static To run(const From& x) { return x; }
};
template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::Real> {
static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return real(x); }
EIGEN_STRONG_INLINE static typename NumTraits<Scalar>::Real run(const Scalar& x) { return real(x); }
};
// Lightweight helper class to access matrix coefficients.
@@ -179,7 +175,7 @@ template<typename XprType> struct blas_traits
ExtractType,
typename _ExtractType::PlainObject
>::type DirectLinearAccessType;
static inline ExtractType extract(const XprType& x) { return x; }
static inline const ExtractType extract(const XprType& x) { return x; }
static inline const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
};
@@ -196,7 +192,7 @@ struct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> >
IsComplex = NumTraits<Scalar>::IsComplex,
NeedToConjugate = Base::NeedToConjugate ? 0 : IsComplex
};
static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline const ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline Scalar extractScalarFactor(const XprType& x) { return conj(Base::extractScalarFactor(x.nestedExpression())); }
};
@@ -208,7 +204,7 @@ struct blas_traits<CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> >
typedef blas_traits<NestedXpr> Base;
typedef CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> XprType;
typedef typename Base::ExtractType ExtractType;
static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline const ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline Scalar extractScalarFactor(const XprType& x)
{ return x.functor().m_other * Base::extractScalarFactor(x.nestedExpression()); }
};
@@ -221,7 +217,7 @@ struct blas_traits<CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> >
typedef blas_traits<NestedXpr> Base;
typedef CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> XprType;
typedef typename Base::ExtractType ExtractType;
static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline const ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline Scalar extractScalarFactor(const XprType& x)
{ return - Base::extractScalarFactor(x.nestedExpression()); }
};
@@ -243,7 +239,7 @@ struct blas_traits<Transpose<NestedXpr> >
enum {
IsTransposed = Base::IsTransposed ? 0 : 1
};
static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline const ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); }
};
@@ -256,7 +252,7 @@ template<typename T, bool HasUsableDirectAccess=blas_traits<T>::HasUsableDirectA
struct extract_data_selector {
static const typename T::Scalar* run(const T& m)
{
return blas_traits<T>::extract(m).data();
return const_cast<typename T::Scalar*>(&blas_traits<T>::extract(m).coeffRef(0,0)); // FIXME this should be .data()
}
};

Some files were not shown because too many files have changed in this diff Show More