mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Compare commits
19 Commits
3.1.0-beta
...
3.1.0-rc1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4ca5735de4 | ||
|
|
b9f25ee656 | ||
|
|
a3e700db72 | ||
|
|
324ecf153b | ||
|
|
9c7b62415a | ||
|
|
4e8523b835 | ||
|
|
88e051019b | ||
|
|
cd48254a87 | ||
|
|
924c7a9300 | ||
|
|
bc580bbffb | ||
|
|
f2849fac20 | ||
|
|
28d0a8580e | ||
|
|
7e36d32b32 | ||
|
|
5cec86cb1e | ||
|
|
512e0b151b | ||
|
|
83c932ed15 | ||
|
|
1e5e66b642 | ||
|
|
63c6ab3e42 | ||
|
|
c1edb7fd95 |
@@ -338,12 +338,9 @@ if(EIGEN_BUILD_BTL)
|
||||
add_subdirectory(bench/btl EXCLUDE_FROM_ALL)
|
||||
endif(EIGEN_BUILD_BTL)
|
||||
|
||||
if(TEST_REAL_CASES)
|
||||
if(NOT WIN32)
|
||||
add_subdirectory(bench/spbench EXCLUDE_FROM_ALL)
|
||||
set(ENV(EIGEN_MATRIX_DIR) ${TEST_REAL_CASES})
|
||||
endif(NOT WIN32)
|
||||
endif(TEST_REAL_CASES)
|
||||
if(NOT WIN32)
|
||||
add_subdirectory(bench/spbench EXCLUDE_FROM_ALL)
|
||||
endif(NOT WIN32)
|
||||
|
||||
ei_testing_print_summary()
|
||||
|
||||
|
||||
@@ -329,12 +329,12 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/GeneralProduct.h"
|
||||
#include "src/Core/TriangularMatrix.h"
|
||||
#include "src/Core/SelfAdjointView.h"
|
||||
#include "src/Core/SolveTriangular.h"
|
||||
#include "src/Core/products/GeneralBlockPanelKernel.h"
|
||||
#include "src/Core/products/Parallelizer.h"
|
||||
#include "src/Core/products/CoeffBasedProduct.h"
|
||||
#include "src/Core/products/GeneralBlockPanelKernel.h"
|
||||
#include "src/Core/products/GeneralMatrixVector.h"
|
||||
#include "src/Core/products/GeneralMatrixMatrix.h"
|
||||
#include "src/Core/SolveTriangular.h"
|
||||
#include "src/Core/products/GeneralMatrixMatrixTriangular.h"
|
||||
#include "src/Core/products/SelfadjointMatrixVector.h"
|
||||
#include "src/Core/products/SelfadjointMatrixMatrix.h"
|
||||
|
||||
@@ -209,10 +209,13 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)
|
||||
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
|
||||
|
||||
// The vm*powx functions are not avaibale in the windows version of MKL.
|
||||
#ifdef _WIN32
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
@@ -295,7 +295,7 @@ struct functor_traits<scalar_opposite_op<Scalar> >
|
||||
template<typename Scalar> struct scalar_abs_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return abs(a); }
|
||||
EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return internal::abs(a); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pabs(a); }
|
||||
@@ -317,7 +317,7 @@ struct functor_traits<scalar_abs_op<Scalar> >
|
||||
template<typename Scalar> struct scalar_abs2_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return abs2(a); }
|
||||
EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return internal::abs2(a); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pmul(a,a); }
|
||||
@@ -333,7 +333,7 @@ struct functor_traits<scalar_abs2_op<Scalar> >
|
||||
*/
|
||||
template<typename Scalar> struct scalar_conjugate_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op)
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return conj(a); }
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return internal::conj(a); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); }
|
||||
};
|
||||
@@ -370,7 +370,7 @@ template<typename Scalar>
|
||||
struct scalar_real_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return real(a); }
|
||||
EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return internal::real(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_real_op<Scalar> >
|
||||
@@ -385,7 +385,7 @@ template<typename Scalar>
|
||||
struct scalar_imag_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return imag(a); }
|
||||
EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return internal::imag(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_imag_op<Scalar> >
|
||||
@@ -400,7 +400,7 @@ template<typename Scalar>
|
||||
struct scalar_real_ref_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return real_ref(*const_cast<Scalar*>(&a)); }
|
||||
EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return internal::real_ref(*const_cast<Scalar*>(&a)); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_real_ref_op<Scalar> >
|
||||
@@ -415,7 +415,7 @@ template<typename Scalar>
|
||||
struct scalar_imag_ref_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return imag_ref(*const_cast<Scalar*>(&a)); }
|
||||
EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return internal::imag_ref(*const_cast<Scalar*>(&a)); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_imag_ref_op<Scalar> >
|
||||
@@ -429,7 +429,7 @@ struct functor_traits<scalar_imag_ref_op<Scalar> >
|
||||
*/
|
||||
template<typename Scalar> struct scalar_exp_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
|
||||
inline const Scalar operator() (const Scalar& a) const { return exp(a); }
|
||||
inline const Scalar operator() (const Scalar& a) const { return internal::exp(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
|
||||
};
|
||||
@@ -445,7 +445,7 @@ struct functor_traits<scalar_exp_op<Scalar> >
|
||||
*/
|
||||
template<typename Scalar> struct scalar_log_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
|
||||
inline const Scalar operator() (const Scalar& a) const { return log(a); }
|
||||
inline const Scalar operator() (const Scalar& a) const { return internal::log(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
|
||||
};
|
||||
@@ -703,7 +703,7 @@ struct functor_traits<scalar_add_op<Scalar> >
|
||||
*/
|
||||
template<typename Scalar> struct scalar_sqrt_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
|
||||
inline const Scalar operator() (const Scalar& a) const { return sqrt(a); }
|
||||
inline const Scalar operator() (const Scalar& a) const { return internal::sqrt(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
|
||||
};
|
||||
@@ -721,7 +721,7 @@ struct functor_traits<scalar_sqrt_op<Scalar> >
|
||||
*/
|
||||
template<typename Scalar> struct scalar_cos_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
|
||||
inline Scalar operator() (const Scalar& a) const { return cos(a); }
|
||||
inline Scalar operator() (const Scalar& a) const { return internal::cos(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
|
||||
};
|
||||
@@ -740,7 +740,7 @@ struct functor_traits<scalar_cos_op<Scalar> >
|
||||
*/
|
||||
template<typename Scalar> struct scalar_sin_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
|
||||
inline const Scalar operator() (const Scalar& a) const { return sin(a); }
|
||||
inline const Scalar operator() (const Scalar& a) const { return internal::sin(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
|
||||
};
|
||||
@@ -760,7 +760,7 @@ struct functor_traits<scalar_sin_op<Scalar> >
|
||||
*/
|
||||
template<typename Scalar> struct scalar_tan_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
|
||||
inline const Scalar operator() (const Scalar& a) const { return tan(a); }
|
||||
inline const Scalar operator() (const Scalar& a) const { return internal::tan(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
|
||||
};
|
||||
@@ -779,7 +779,7 @@ struct functor_traits<scalar_tan_op<Scalar> >
|
||||
*/
|
||||
template<typename Scalar> struct scalar_acos_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
|
||||
inline const Scalar operator() (const Scalar& a) const { return acos(a); }
|
||||
inline const Scalar operator() (const Scalar& a) const { return internal::acos(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
|
||||
};
|
||||
@@ -798,7 +798,7 @@ struct functor_traits<scalar_acos_op<Scalar> >
|
||||
*/
|
||||
template<typename Scalar> struct scalar_asin_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
|
||||
inline const Scalar operator() (const Scalar& a) const { return asin(a); }
|
||||
inline const Scalar operator() (const Scalar& a) const { return internal::asin(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
|
||||
};
|
||||
|
||||
@@ -100,12 +100,22 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
|
||||
typedef typename Rhs::Index Index;
|
||||
typedef blas_traits<Lhs> LhsProductTraits;
|
||||
typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
|
||||
|
||||
static void run(const Lhs& lhs, Rhs& rhs)
|
||||
{
|
||||
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsProductTraits::extract(lhs);
|
||||
|
||||
const Index size = lhs.rows();
|
||||
const Index othersize = Side==OnTheLeft? rhs.cols() : rhs.rows();
|
||||
|
||||
typedef internal::gemm_blocking_space<(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
|
||||
Rhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxRowsAtCompileTime,4> BlockingType;
|
||||
|
||||
BlockingType blocking(rhs.rows(), rhs.cols(), size);
|
||||
|
||||
triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
|
||||
(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor>
|
||||
::run(lhs.rows(), Side==OnTheLeft? rhs.cols() : rhs.rows(), &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride());
|
||||
::run(size, othersize, &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride(), blocking);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -123,7 +123,7 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
|
||||
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
|
||||
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647949f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
|
||||
@@ -170,7 +170,7 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
|
||||
y = pmadd(y, z, x);
|
||||
y = padd(y, p4f_1);
|
||||
|
||||
/* build 2^n */
|
||||
// build 2^n
|
||||
emm0 = _mm_cvttps_epi32(fx);
|
||||
emm0 = _mm_add_epi32(emm0, p4i_0x7f);
|
||||
emm0 = _mm_slli_epi32(emm0, 23);
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
#define EIGEN_GENERAL_BLOCK_PANEL_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false>
|
||||
@@ -42,9 +42,14 @@ inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff
|
||||
/** \internal */
|
||||
inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0)
|
||||
{
|
||||
static std::ptrdiff_t m_l1CacheSize = manage_caching_sizes_helper(queryL1CacheSize(),8 * 1024);
|
||||
static std::ptrdiff_t m_l2CacheSize = manage_caching_sizes_helper(queryTopLevelCacheSize(),1*1024*1024);
|
||||
|
||||
static std::ptrdiff_t m_l1CacheSize = 0;
|
||||
static std::ptrdiff_t m_l2CacheSize = 0;
|
||||
if(m_l2CacheSize==0)
|
||||
{
|
||||
m_l1CacheSize = manage_caching_sizes_helper(queryL1CacheSize(),8 * 1024);
|
||||
m_l2CacheSize = manage_caching_sizes_helper(queryTopLevelCacheSize(),1*1024*1024);
|
||||
}
|
||||
|
||||
if(action==SetAction)
|
||||
{
|
||||
// set the cpu cache size and cache all block sizes from a global cache size in byte
|
||||
|
||||
@@ -79,7 +79,7 @@ static void run(Index rows, Index cols, Index depth,
|
||||
|
||||
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
|
||||
Index kc = blocking.kc(); // cache block size along the K direction
|
||||
Index kc = blocking.kc(); // cache block size along the K direction
|
||||
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
||||
//Index nc = blocking.nc(); // cache block size along the N direction
|
||||
|
||||
@@ -249,7 +249,7 @@ struct gemm_functor
|
||||
BlockingType& m_blocking;
|
||||
};
|
||||
|
||||
template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth,
|
||||
template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor=1,
|
||||
bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class gemm_blocking_space;
|
||||
|
||||
template<typename _LhsScalar, typename _RhsScalar>
|
||||
@@ -282,8 +282,8 @@ class level3_blocking
|
||||
inline RhsScalar* blockW() { return m_blockW; }
|
||||
};
|
||||
|
||||
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth>
|
||||
class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, true>
|
||||
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
|
||||
class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true>
|
||||
: public level3_blocking<
|
||||
typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
|
||||
typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
|
||||
@@ -324,8 +324,8 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
||||
inline void allocateAll() {}
|
||||
};
|
||||
|
||||
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth>
|
||||
class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, false>
|
||||
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
|
||||
class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, false>
|
||||
: public level3_blocking<
|
||||
typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
|
||||
typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
|
||||
@@ -349,7 +349,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
||||
this->m_nc = Transpose ? rows : cols;
|
||||
this->m_kc = depth;
|
||||
|
||||
computeProductBlockingSizes<LhsScalar,RhsScalar>(this->m_kc, this->m_mc, this->m_nc);
|
||||
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, this->m_nc);
|
||||
m_sizeA = this->m_mc * this->m_kc;
|
||||
m_sizeB = this->m_kc * this->m_nc;
|
||||
m_sizeW = this->m_kc*Traits::WorkSpaceFactor;
|
||||
|
||||
@@ -57,12 +57,23 @@ inline void manage_multi_threading(Action action, int* v)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** Must be call first when calling Eigen from multiple threads */
|
||||
inline void initParallel()
|
||||
{
|
||||
int nbt;
|
||||
internal::manage_multi_threading(GetAction, &nbt);
|
||||
std::ptrdiff_t l1, l2;
|
||||
internal::manage_caching_sizes(GetAction, &l1, &l2);
|
||||
}
|
||||
|
||||
/** \returns the max number of threads reserved for Eigen
|
||||
* \sa setNbThreads */
|
||||
inline int nbThreads()
|
||||
{
|
||||
int ret;
|
||||
manage_multi_threading(GetAction, &ret);
|
||||
internal::manage_multi_threading(GetAction, &ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -70,9 +81,11 @@ inline int nbThreads()
|
||||
* \sa nbThreads */
|
||||
inline void setNbThreads(int v)
|
||||
{
|
||||
manage_multi_threading(SetAction, &v);
|
||||
internal::manage_multi_threading(SetAction, &v);
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Index> struct GemmParallelInfo
|
||||
{
|
||||
GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0) {}
|
||||
@@ -121,6 +134,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
|
||||
if(threads==1)
|
||||
return func(0,rows, 0,cols);
|
||||
|
||||
Eigen::initParallel();
|
||||
func.initParallelSession();
|
||||
|
||||
if(transpose)
|
||||
|
||||
@@ -36,14 +36,15 @@ struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,
|
||||
static EIGEN_DONT_INLINE void run(
|
||||
Index size, Index cols,
|
||||
const Scalar* tri, Index triStride,
|
||||
Scalar* _other, Index otherStride)
|
||||
Scalar* _other, Index otherStride,
|
||||
level3_blocking<Scalar,Scalar>& blocking)
|
||||
{
|
||||
triangular_solve_matrix<
|
||||
Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft,
|
||||
(Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper),
|
||||
NumTraits<Scalar>::IsComplex && Conjugate,
|
||||
TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor>
|
||||
::run(size, cols, tri, triStride, _other, otherStride);
|
||||
::run(size, cols, tri, triStride, _other, otherStride, blocking);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -55,7 +56,8 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
|
||||
static EIGEN_DONT_INLINE void run(
|
||||
Index size, Index otherSize,
|
||||
const Scalar* _tri, Index triStride,
|
||||
Scalar* _other, Index otherStride)
|
||||
Scalar* _other, Index otherStride,
|
||||
level3_blocking<Scalar,Scalar>& blocking)
|
||||
{
|
||||
Index cols = otherSize;
|
||||
const_blas_data_mapper<Scalar, Index, TriStorageOrder> tri(_tri,triStride);
|
||||
@@ -67,17 +69,16 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
|
||||
IsLower = (Mode&Lower) == Lower
|
||||
};
|
||||
|
||||
Index kc = size; // cache block size along the K direction
|
||||
Index mc = size; // cache block size along the M direction
|
||||
Index nc = cols; // cache block size along the N direction
|
||||
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
|
||||
Index kc = blocking.kc(); // cache block size along the K direction
|
||||
Index mc = (std::min)(size,blocking.mc()); // cache block size along the M direction
|
||||
|
||||
std::size_t sizeA = kc*mc;
|
||||
std::size_t sizeB = kc*cols;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = sizeW + kc*cols;
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
|
||||
Scalar* blockB = allocatedBlockB + sizeW;
|
||||
Scalar* blockW = allocatedBlockB;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
|
||||
|
||||
conj_if<Conjugate> conj;
|
||||
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
|
||||
@@ -181,7 +182,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
|
||||
{
|
||||
pack_lhs(blockA, &tri(i2, IsLower ? k2 : k2-kc), triStride, actual_kc, actual_mc);
|
||||
|
||||
gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1));
|
||||
gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0, blockW);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -197,7 +198,8 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
|
||||
static EIGEN_DONT_INLINE void run(
|
||||
Index size, Index otherSize,
|
||||
const Scalar* _tri, Index triStride,
|
||||
Scalar* _other, Index otherStride)
|
||||
Scalar* _other, Index otherStride,
|
||||
level3_blocking<Scalar,Scalar>& blocking)
|
||||
{
|
||||
Index rows = otherSize;
|
||||
const_blas_data_mapper<Scalar, Index, TriStorageOrder> rhs(_tri,triStride);
|
||||
@@ -210,19 +212,16 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
|
||||
IsLower = (Mode&Lower) == Lower
|
||||
};
|
||||
|
||||
// Index kc = std::min<Index>(Traits::Max_kc/4,size); // cache block size along the K direction
|
||||
// Index mc = std::min<Index>(Traits::Max_mc,size); // cache block size along the M direction
|
||||
// check that !!!!
|
||||
Index kc = size; // cache block size along the K direction
|
||||
Index mc = size; // cache block size along the M direction
|
||||
Index nc = rows; // cache block size along the N direction
|
||||
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
|
||||
Index kc = blocking.kc(); // cache block size along the K direction
|
||||
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
||||
|
||||
std::size_t sizeA = kc*mc;
|
||||
std::size_t sizeB = kc*size;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = sizeW + kc*size;
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
|
||||
Scalar* blockB = allocatedBlockB + sizeW;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
|
||||
|
||||
conj_if<Conjugate> conj;
|
||||
gebp_kernel<Scalar,Scalar, Index, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
|
||||
@@ -289,7 +288,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
|
||||
Scalar(-1),
|
||||
actual_kc, actual_kc, // strides
|
||||
panelOffset, panelOffset, // offsets
|
||||
allocatedBlockB); // workspace
|
||||
blockW); // workspace
|
||||
}
|
||||
|
||||
// unblocked triangular solve
|
||||
@@ -320,7 +319,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
|
||||
if (rs>0)
|
||||
gebp_kernel(_other+i2+startPanel*otherStride, otherStride, blockA, geb,
|
||||
actual_mc, actual_kc, rs, Scalar(-1),
|
||||
-1, -1, 0, 0, allocatedBlockB);
|
||||
-1, -1, 0, 0, blockW);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
|
||||
#define EIGEN_WORLD_VERSION 3
|
||||
#define EIGEN_MAJOR_VERSION 0
|
||||
#define EIGEN_MINOR_VERSION 93
|
||||
#define EIGEN_MINOR_VERSION 94
|
||||
|
||||
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
|
||||
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
|
||||
|
||||
@@ -468,15 +468,40 @@ public:
|
||||
{
|
||||
return internal::transform_transform_product_impl<Transform,Transform>::run(*this,other);
|
||||
}
|
||||
|
||||
|
||||
#ifdef __INTEL_COMPILER
|
||||
private:
|
||||
// this intermediate structure permits to workaround a bug in ICC 11:
|
||||
// error: template instantiation resulted in unexpected function type of "Eigen::Transform<double, 3, 32, 0>
|
||||
// (const Eigen::Transform<double, 3, 2, 0> &) const"
|
||||
// (the meaning of a name may have changed since the template declaration -- the type of the template is:
|
||||
// "Eigen::internal::transform_transform_product_impl<Eigen::Transform<double, 3, 32, 0>,
|
||||
// Eigen::Transform<double, 3, Mode, Options>, <expression>>::ResultType (const Eigen::Transform<double, 3, Mode, Options> &) const")
|
||||
//
|
||||
template<int OtherMode,int OtherOptions> struct icc_11_workaround
|
||||
{
|
||||
typedef internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> > ProductType;
|
||||
typedef typename ProductType::ResultType ResultType;
|
||||
};
|
||||
|
||||
public:
|
||||
/** Concatenates two different transformations */
|
||||
template<int OtherMode,int OtherOptions>
|
||||
inline const typename internal::transform_transform_product_impl<
|
||||
Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::ResultType
|
||||
inline typename icc_11_workaround<OtherMode,OtherOptions>::ResultType
|
||||
operator * (const Transform<Scalar,Dim,OtherMode,OtherOptions>& other) const
|
||||
{
|
||||
typedef typename icc_11_workaround<OtherMode,OtherOptions>::ProductType ProductType;
|
||||
return ProductType::run(*this,other);
|
||||
}
|
||||
#else
|
||||
/** Concatenates two different transformations */
|
||||
template<int OtherMode,int OtherOptions>
|
||||
inline typename internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::ResultType
|
||||
operator * (const Transform<Scalar,Dim,OtherMode,OtherOptions>& other) const
|
||||
{
|
||||
return internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::run(*this,other);
|
||||
}
|
||||
#endif
|
||||
|
||||
/** \sa MatrixBase::setIdentity() */
|
||||
void setIdentity() { m_matrix.setIdentity(); }
|
||||
|
||||
@@ -35,7 +35,6 @@ namespace Eigen {
|
||||
*
|
||||
* \sa TutorialSparseDirectSolvers
|
||||
*/
|
||||
|
||||
template<typename _MatrixType, bool IsStrSym = false> class PastixLU;
|
||||
template<typename _MatrixType, int Options> class PastixLLT;
|
||||
template<typename _MatrixType, int Options> class PastixLDLT;
|
||||
@@ -75,32 +74,34 @@ namespace internal
|
||||
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm)
|
||||
{
|
||||
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
|
||||
if (nbrhs == 0) x = NULL;
|
||||
if (nbrhs == 0) {x = NULL; nbrhs=1;}
|
||||
s_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm);
|
||||
}
|
||||
|
||||
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm)
|
||||
{
|
||||
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
|
||||
if (nbrhs == 0) x = NULL;
|
||||
if (nbrhs == 0) {x = NULL; nbrhs=1;}
|
||||
d_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm);
|
||||
}
|
||||
|
||||
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<float> *vals, int *perm, int * invp, std::complex<float> *x, int nbrhs, int *iparm, double *dparm)
|
||||
{
|
||||
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
|
||||
if (nbrhs == 0) {x = NULL; nbrhs=1;}
|
||||
c_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<COMPLEX*>(vals), perm, invp, reinterpret_cast<COMPLEX*>(x), nbrhs, iparm, dparm);
|
||||
}
|
||||
|
||||
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<double> *vals, int *perm, int * invp, std::complex<double> *x, int nbrhs, int *iparm, double *dparm)
|
||||
{
|
||||
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
|
||||
if (nbrhs == 0) x = NULL;
|
||||
if (nbrhs == 0) {x = NULL; nbrhs=1;}
|
||||
z_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<DCOMPLEX*>(vals), perm, invp, reinterpret_cast<DCOMPLEX*>(x), nbrhs, iparm, dparm);
|
||||
}
|
||||
|
||||
// Convert the matrix to Fortran-style Numbering
|
||||
template <typename MatrixType>
|
||||
void EigenToFortranNumbering (MatrixType& mat)
|
||||
void c_to_fortran_numbering (MatrixType& mat)
|
||||
{
|
||||
if ( !(mat.outerIndexPtr()[0]) )
|
||||
{
|
||||
@@ -114,7 +115,7 @@ namespace internal
|
||||
|
||||
// Convert to C-style Numbering
|
||||
template <typename MatrixType>
|
||||
void EigenToCNumbering (MatrixType& mat)
|
||||
void fortran_to_c_numbering (MatrixType& mat)
|
||||
{
|
||||
// Check the Numbering
|
||||
if ( mat.outerIndexPtr()[0] == 1 )
|
||||
@@ -126,38 +127,12 @@ namespace internal
|
||||
--mat.innerIndexPtr()[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Symmetrize the graph of the input matrix
|
||||
// In : The Input matrix to symmetrize the pattern
|
||||
// Out : The output matrix
|
||||
// StrMatTrans : The structural pattern of the transpose of In; It is
|
||||
// used to optimize the future symmetrization with the same matrix pattern
|
||||
// WARNING It is assumed here that successive calls to this routine are done
|
||||
// with matrices having the same pattern.
|
||||
template <typename MatrixType>
|
||||
void EigenSymmetrizeMatrixGraph (const MatrixType& In, MatrixType& Out, MatrixType& StrMatTrans, bool& hasTranspose)
|
||||
{
|
||||
eigen_assert(In.cols()==In.rows() && " Can only symmetrize the graph of a square matrix");
|
||||
if (!hasTranspose)
|
||||
{ //First call to this routine, need to compute the structural pattern of In^T
|
||||
StrMatTrans = In.transpose();
|
||||
// Set the elements of the matrix to zero
|
||||
for (int i = 0; i < StrMatTrans.rows(); i++)
|
||||
{
|
||||
for (typename MatrixType::InnerIterator it(StrMatTrans, i); it; ++it)
|
||||
it.valueRef() = 0.0;
|
||||
}
|
||||
hasTranspose = true;
|
||||
}
|
||||
Out = (StrMatTrans + In).eval();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// This is the base class to interface with PaStiX functions.
|
||||
// Users should not used this class directly.
|
||||
template <class Derived>
|
||||
class PastixBase
|
||||
class PastixBase : internal::noncopyable
|
||||
{
|
||||
public:
|
||||
typedef typename internal::pastix_traits<Derived>::MatrixType _MatrixType;
|
||||
@@ -166,29 +141,19 @@ class PastixBase
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
typedef typename MatrixType::Index Index;
|
||||
typedef Matrix<Scalar,Dynamic,1> Vector;
|
||||
typedef SparseMatrix<Scalar, ColMajor> ColSpMatrix;
|
||||
|
||||
public:
|
||||
|
||||
PastixBase():m_initisOk(false),m_analysisIsOk(false),m_factorizationIsOk(false),m_isInitialized(false)
|
||||
PastixBase() : m_initisOk(false), m_analysisIsOk(false), m_factorizationIsOk(false), m_isInitialized(false), m_pastixdata(0), m_size(0)
|
||||
{
|
||||
m_pastixdata = 0;
|
||||
m_hasTranspose = false;
|
||||
PastixInit();
|
||||
init();
|
||||
}
|
||||
|
||||
~PastixBase()
|
||||
{
|
||||
PastixDestroy();
|
||||
clean();
|
||||
}
|
||||
|
||||
// Initialize the Pastix data structure, check the matrix
|
||||
void PastixInit();
|
||||
|
||||
// Compute the ordering and the symbolic factorization
|
||||
Derived& analyzePattern (MatrixType& mat);
|
||||
|
||||
// Compute the numerical factorization
|
||||
Derived& factorize (MatrixType& mat);
|
||||
|
||||
/** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
|
||||
*
|
||||
@@ -269,7 +234,6 @@ class PastixBase
|
||||
/** Return a reference to a particular index parameter of the DPARM vector
|
||||
* \sa dparm()
|
||||
*/
|
||||
|
||||
double& dparm(int idxparam)
|
||||
{
|
||||
return m_dparm(idxparam);
|
||||
@@ -307,17 +271,27 @@ class PastixBase
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
// Initialize the Pastix data structure, check the matrix
|
||||
void init();
|
||||
|
||||
// Compute the ordering and the symbolic factorization
|
||||
void analyzePattern(ColSpMatrix& mat);
|
||||
|
||||
// Compute the numerical factorization
|
||||
void factorize(ColSpMatrix& mat);
|
||||
|
||||
// Free all the data allocated by Pastix
|
||||
void PastixDestroy()
|
||||
void clean()
|
||||
{
|
||||
eigen_assert(m_initisOk && "The Pastix structure should be allocated first");
|
||||
m_iparm(IPARM_START_TASK) = API_TASK_CLEAN;
|
||||
m_iparm(IPARM_END_TASK) = API_TASK_CLEAN;
|
||||
internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, 0, m_mat_null.outerIndexPtr(), m_mat_null.innerIndexPtr(),
|
||||
m_mat_null.valuePtr(), m_perm.data(), m_invp.data(), m_vec_null.data(), 1, m_iparm.data(), m_dparm.data());
|
||||
internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, 0, 0, 0, (Scalar*)0,
|
||||
m_perm.data(), m_invp.data(), 0, 0, m_iparm.data(), m_dparm.data());
|
||||
}
|
||||
|
||||
Derived& compute (MatrixType& mat);
|
||||
void compute(ColSpMatrix& mat);
|
||||
|
||||
int m_initisOk;
|
||||
int m_analysisIsOk;
|
||||
@@ -325,22 +299,12 @@ class PastixBase
|
||||
bool m_isInitialized;
|
||||
mutable ComputationInfo m_info;
|
||||
mutable pastix_data_t *m_pastixdata; // Data structure for pastix
|
||||
mutable SparseMatrix<Scalar, ColMajor> m_mat_null; // An input null matrix
|
||||
mutable Matrix<Scalar, Dynamic,1> m_vec_null; // An input null vector
|
||||
mutable SparseMatrix<Scalar, ColMajor> m_StrMatTrans; // The transpose pattern of the input matrix
|
||||
mutable bool m_hasTranspose; // The transpose of the current matrix has already been computed
|
||||
mutable int m_comm; // The MPI communicator identifier
|
||||
mutable Matrix<Index,IPARM_SIZE,1> m_iparm; // integer vector for the input parameters
|
||||
mutable Matrix<int,IPARM_SIZE,1> m_iparm; // integer vector for the input parameters
|
||||
mutable Matrix<double,DPARM_SIZE,1> m_dparm; // Scalar vector for the input parameters
|
||||
mutable Matrix<Index,Dynamic,1> m_perm; // Permutation vector
|
||||
mutable Matrix<Index,Dynamic,1> m_invp; // Inverse permutation vector
|
||||
mutable int m_ordering; // ordering method to use
|
||||
mutable int m_amalgamation; // level of amalgamation
|
||||
mutable int m_size; // Size of the matrix
|
||||
|
||||
private:
|
||||
PastixBase(PastixBase& ) {}
|
||||
|
||||
};
|
||||
|
||||
/** Initialize the PaStiX data structure.
|
||||
@@ -348,29 +312,29 @@ class PastixBase
|
||||
* \sa iparm() dparm()
|
||||
*/
|
||||
template <class Derived>
|
||||
void PastixBase<Derived>::PastixInit()
|
||||
void PastixBase<Derived>::init()
|
||||
{
|
||||
m_size = 0;
|
||||
m_iparm.resize(IPARM_SIZE);
|
||||
m_dparm.resize(DPARM_SIZE);
|
||||
m_iparm.setZero(IPARM_SIZE);
|
||||
m_dparm.setZero(DPARM_SIZE);
|
||||
|
||||
m_iparm(IPARM_MODIFY_PARAMETER) = API_NO;
|
||||
if(m_pastixdata)
|
||||
{ // This trick is used to reset the Pastix internal data between successive
|
||||
// calls with (structural) different matrices
|
||||
PastixDestroy();
|
||||
m_pastixdata = 0;
|
||||
m_iparm(IPARM_MODIFY_PARAMETER) = API_YES;
|
||||
m_hasTranspose = false;
|
||||
}
|
||||
pastix(&m_pastixdata, MPI_COMM_WORLD,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 1, m_iparm.data(), m_dparm.data());
|
||||
|
||||
m_iparm[IPARM_MATRIX_VERIFICATION] = API_NO;
|
||||
m_iparm[IPARM_VERBOSE] = 2;
|
||||
m_iparm[IPARM_ORDERING] = API_ORDER_SCOTCH;
|
||||
m_iparm[IPARM_INCOMPLETE] = API_NO;
|
||||
m_iparm[IPARM_OOC_LIMIT] = 2000;
|
||||
m_iparm[IPARM_RHS_MAKING] = API_RHS_B;
|
||||
m_iparm(IPARM_MATRIX_VERIFICATION) = API_NO;
|
||||
|
||||
m_iparm(IPARM_START_TASK) = API_TASK_INIT;
|
||||
m_iparm(IPARM_END_TASK) = API_TASK_INIT;
|
||||
m_iparm(IPARM_MATRIX_VERIFICATION) = API_NO;
|
||||
internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, 0, m_mat_null.outerIndexPtr(), m_mat_null.innerIndexPtr(),
|
||||
m_mat_null.valuePtr(), m_perm.data(), m_invp.data(), m_vec_null.data(), 1, m_iparm.data(), m_dparm.data());
|
||||
|
||||
m_iparm(IPARM_MATRIX_VERIFICATION) = API_NO;
|
||||
internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, 0, 0, 0, (Scalar*)0,
|
||||
0, 0, 0, 0, m_iparm.data(), m_dparm.data());
|
||||
|
||||
// Check the returned error
|
||||
if(m_iparm(IPARM_ERROR_NUMBER)) {
|
||||
@@ -384,82 +348,74 @@ void PastixBase<Derived>::PastixInit()
|
||||
}
|
||||
|
||||
template <class Derived>
|
||||
Derived& PastixBase<Derived>::compute(MatrixType& mat)
|
||||
void PastixBase<Derived>::compute(ColSpMatrix& mat)
|
||||
{
|
||||
eigen_assert(mat.rows() == mat.cols() && "The input matrix should be squared");
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
|
||||
// Save the size of the current matrix
|
||||
m_size = mat.rows();
|
||||
// Convert the matrix in fortran-style numbering
|
||||
internal::EigenToFortranNumbering(mat);
|
||||
analyzePattern(mat);
|
||||
analyzePattern(mat);
|
||||
factorize(mat);
|
||||
|
||||
m_iparm(IPARM_MATRIX_VERIFICATION) = API_NO;
|
||||
if (m_factorizationIsOk) m_isInitialized = true;
|
||||
|
||||
//Convert back the matrix -- Is it really necessary here
|
||||
internal::EigenToCNumbering(mat);
|
||||
|
||||
return derived();
|
||||
m_isInitialized = m_factorizationIsOk;
|
||||
}
|
||||
|
||||
|
||||
template <class Derived>
|
||||
Derived& PastixBase<Derived>::analyzePattern(MatrixType& mat)
|
||||
{
|
||||
eigen_assert(m_initisOk && "PastixInit should be called first to set the default parameters");
|
||||
void PastixBase<Derived>::analyzePattern(ColSpMatrix& mat)
|
||||
{
|
||||
eigen_assert(m_initisOk && "The initialization of PaSTiX failed");
|
||||
|
||||
// clean previous calls
|
||||
if(m_size>0)
|
||||
clean();
|
||||
|
||||
m_size = mat.rows();
|
||||
m_perm.resize(m_size);
|
||||
m_invp.resize(m_size);
|
||||
|
||||
// Convert the matrix in fortran-style numbering
|
||||
internal::EigenToFortranNumbering(mat);
|
||||
|
||||
m_iparm(IPARM_START_TASK) = API_TASK_ORDERING;
|
||||
m_iparm(IPARM_END_TASK) = API_TASK_ANALYSE;
|
||||
|
||||
internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, m_size, mat.outerIndexPtr(), mat.innerIndexPtr(),
|
||||
mat.valuePtr(), m_perm.data(), m_invp.data(), m_vec_null.data(), 0, m_iparm.data(), m_dparm.data());
|
||||
mat.valuePtr(), m_perm.data(), m_invp.data(), 0, 0, m_iparm.data(), m_dparm.data());
|
||||
|
||||
// Check the returned error
|
||||
if(m_iparm(IPARM_ERROR_NUMBER)) {
|
||||
if(m_iparm(IPARM_ERROR_NUMBER))
|
||||
{
|
||||
m_info = NumericalIssue;
|
||||
m_analysisIsOk = false;
|
||||
}
|
||||
else {
|
||||
else
|
||||
{
|
||||
m_info = Success;
|
||||
m_analysisIsOk = true;
|
||||
}
|
||||
return derived();
|
||||
}
|
||||
|
||||
template <class Derived>
|
||||
Derived& PastixBase<Derived>::factorize(MatrixType& mat)
|
||||
void PastixBase<Derived>::factorize(ColSpMatrix& mat)
|
||||
{
|
||||
// if(&m_cpyMat != &mat) m_cpyMat = mat;
|
||||
eigen_assert(m_analysisIsOk && "The analysis phase should be called before the factorization phase");
|
||||
m_iparm(IPARM_START_TASK) = API_TASK_NUMFACT;
|
||||
m_iparm(IPARM_END_TASK) = API_TASK_NUMFACT;
|
||||
m_size = mat.rows();
|
||||
|
||||
// Convert the matrix in fortran-style numbering
|
||||
internal::EigenToFortranNumbering(mat);
|
||||
|
||||
internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, m_size, mat.outerIndexPtr(), mat.innerIndexPtr(),
|
||||
mat.valuePtr(), m_perm.data(), m_invp.data(), m_vec_null.data(), 0, m_iparm.data(), m_dparm.data());
|
||||
mat.valuePtr(), m_perm.data(), m_invp.data(), 0, 0, m_iparm.data(), m_dparm.data());
|
||||
|
||||
// Check the returned error
|
||||
if(m_iparm(IPARM_ERROR_NUMBER)) {
|
||||
if(m_iparm(IPARM_ERROR_NUMBER))
|
||||
{
|
||||
m_info = NumericalIssue;
|
||||
m_factorizationIsOk = false;
|
||||
m_isInitialized = false;
|
||||
}
|
||||
else {
|
||||
else
|
||||
{
|
||||
m_info = Success;
|
||||
m_factorizationIsOk = true;
|
||||
m_isInitialized = true;
|
||||
}
|
||||
return derived();
|
||||
}
|
||||
|
||||
/* Solve the system */
|
||||
@@ -475,20 +431,17 @@ bool PastixBase<Base>::_solve (const MatrixBase<Rhs> &b, MatrixBase<Dest> &x) co
|
||||
x = b; /* on return, x is overwritten by the computed solution */
|
||||
|
||||
for (int i = 0; i < b.cols(); i++){
|
||||
m_iparm(IPARM_START_TASK) = API_TASK_SOLVE;
|
||||
m_iparm(IPARM_END_TASK) = API_TASK_REFINE;
|
||||
m_iparm(IPARM_RHS_MAKING) = API_RHS_B;
|
||||
internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, x.rows(), m_mat_null.outerIndexPtr(), m_mat_null.innerIndexPtr(),
|
||||
m_mat_null.valuePtr(), m_perm.data(), m_invp.data(), &x(0, i), rhs, m_iparm.data(), m_dparm.data());
|
||||
m_iparm[IPARM_START_TASK] = API_TASK_SOLVE;
|
||||
m_iparm[IPARM_END_TASK] = API_TASK_REFINE;
|
||||
|
||||
internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, x.rows(), 0, 0, 0,
|
||||
m_perm.data(), m_invp.data(), &x(0, i), rhs, m_iparm.data(), m_dparm.data());
|
||||
}
|
||||
|
||||
// Check the returned error
|
||||
if(m_iparm(IPARM_ERROR_NUMBER)) {
|
||||
m_info = NumericalIssue;
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
return true;
|
||||
}
|
||||
m_info = m_iparm(IPARM_ERROR_NUMBER)==0 ? Success : NumericalIssue;
|
||||
|
||||
return m_iparm(IPARM_ERROR_NUMBER)==0;
|
||||
}
|
||||
|
||||
/** \ingroup PaStiXSupport_Module
|
||||
@@ -516,14 +469,18 @@ class PastixLU : public PastixBase< PastixLU<_MatrixType> >
|
||||
public:
|
||||
typedef _MatrixType MatrixType;
|
||||
typedef PastixBase<PastixLU<MatrixType> > Base;
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef SparseMatrix<Scalar, ColMajor> PaStiXType;
|
||||
typedef typename Base::ColSpMatrix ColSpMatrix;
|
||||
typedef typename MatrixType::Index Index;
|
||||
|
||||
public:
|
||||
PastixLU():Base() {}
|
||||
PastixLU() : Base()
|
||||
{
|
||||
init();
|
||||
}
|
||||
|
||||
PastixLU(const MatrixType& matrix):Base()
|
||||
{
|
||||
init();
|
||||
compute(matrix);
|
||||
}
|
||||
/** Compute the LU supernodal factorization of \p matrix.
|
||||
@@ -533,18 +490,9 @@ class PastixLU : public PastixBase< PastixLU<_MatrixType> >
|
||||
*/
|
||||
void compute (const MatrixType& matrix)
|
||||
{
|
||||
// Pastix supports only column-major matrices with a symmetric pattern
|
||||
Base::PastixInit();
|
||||
PaStiXType temp(matrix.rows(), matrix.cols());
|
||||
// Symmetrize the graph of the matrix
|
||||
if (IsStrSym)
|
||||
temp = matrix;
|
||||
else
|
||||
{
|
||||
internal::EigenSymmetrizeMatrixGraph<PaStiXType>(matrix, temp, m_StrMatTrans, m_hasTranspose);
|
||||
}
|
||||
m_iparm[IPARM_SYM] = API_SYM_NO;
|
||||
m_iparm(IPARM_FACTORIZATION) = API_FACT_LU;
|
||||
m_structureIsUptodate = false;
|
||||
ColSpMatrix temp;
|
||||
grabMatrix(matrix, temp);
|
||||
Base::compute(temp);
|
||||
}
|
||||
/** Compute the LU symbolic factorization of \p matrix using its sparsity pattern.
|
||||
@@ -554,20 +502,9 @@ class PastixLU : public PastixBase< PastixLU<_MatrixType> >
|
||||
*/
|
||||
void analyzePattern(const MatrixType& matrix)
|
||||
{
|
||||
|
||||
Base::PastixInit();
|
||||
/* Pastix supports only column-major matrices with symmetrized patterns */
|
||||
SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
|
||||
// Symmetrize the graph of the matrix
|
||||
if (IsStrSym)
|
||||
temp = matrix;
|
||||
else
|
||||
{
|
||||
internal::EigenSymmetrizeMatrixGraph<PaStiXType>(matrix, temp, m_StrMatTrans,m_hasTranspose);
|
||||
}
|
||||
|
||||
m_iparm(IPARM_SYM) = API_SYM_NO;
|
||||
m_iparm(IPARM_FACTORIZATION) = API_FACT_LU;
|
||||
m_structureIsUptodate = false;
|
||||
ColSpMatrix temp;
|
||||
grabMatrix(matrix, temp);
|
||||
Base::analyzePattern(temp);
|
||||
}
|
||||
|
||||
@@ -578,27 +515,48 @@ class PastixLU : public PastixBase< PastixLU<_MatrixType> >
|
||||
*/
|
||||
void factorize(const MatrixType& matrix)
|
||||
{
|
||||
/* Pastix supports only column-major matrices with symmetrized patterns */
|
||||
SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
|
||||
// Symmetrize the graph of the matrix
|
||||
if (IsStrSym)
|
||||
temp = matrix;
|
||||
else
|
||||
{
|
||||
internal::EigenSymmetrizeMatrixGraph<PaStiXType>(matrix, temp, m_StrMatTrans,m_hasTranspose);
|
||||
}
|
||||
m_iparm(IPARM_SYM) = API_SYM_NO;
|
||||
m_iparm(IPARM_FACTORIZATION) = API_FACT_LU;
|
||||
ColSpMatrix temp;
|
||||
grabMatrix(matrix, temp);
|
||||
Base::factorize(temp);
|
||||
}
|
||||
protected:
|
||||
|
||||
void init()
|
||||
{
|
||||
m_structureIsUptodate = false;
|
||||
m_iparm(IPARM_SYM) = API_SYM_NO;
|
||||
m_iparm(IPARM_FACTORIZATION) = API_FACT_LU;
|
||||
}
|
||||
|
||||
void grabMatrix(const MatrixType& matrix, ColSpMatrix& out)
|
||||
{
|
||||
if(IsStrSym)
|
||||
out = matrix;
|
||||
else
|
||||
{
|
||||
if(!m_structureIsUptodate)
|
||||
{
|
||||
// update the transposed structure
|
||||
m_transposedStructure = matrix.transpose();
|
||||
|
||||
// Set the elements of the matrix to zero
|
||||
for (Index j=0; j<m_transposedStructure.outerSize(); ++j)
|
||||
for(typename ColSpMatrix::InnerIterator it(m_transposedStructure, j); it; ++it)
|
||||
it.valueRef() = 0.0;
|
||||
|
||||
m_structureIsUptodate = true;
|
||||
}
|
||||
|
||||
out = m_transposedStructure + matrix;
|
||||
}
|
||||
internal::c_to_fortran_numbering(out);
|
||||
}
|
||||
|
||||
using Base::m_iparm;
|
||||
using Base::m_dparm;
|
||||
using Base::m_StrMatTrans;
|
||||
using Base::m_hasTranspose;
|
||||
|
||||
private:
|
||||
PastixLU(PastixLU& ) {}
|
||||
ColSpMatrix m_transposedStructure;
|
||||
bool m_structureIsUptodate;
|
||||
};
|
||||
|
||||
/** \ingroup PaStiXSupport_Module
|
||||
@@ -621,15 +579,18 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
|
||||
public:
|
||||
typedef _MatrixType MatrixType;
|
||||
typedef PastixBase<PastixLLT<MatrixType, _UpLo> > Base;
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::Index Index;
|
||||
typedef typename Base::ColSpMatrix ColSpMatrix;
|
||||
|
||||
public:
|
||||
enum { UpLo = _UpLo };
|
||||
PastixLLT():Base() {}
|
||||
PastixLLT() : Base()
|
||||
{
|
||||
init();
|
||||
}
|
||||
|
||||
PastixLLT(const MatrixType& matrix):Base()
|
||||
{
|
||||
init();
|
||||
compute(matrix);
|
||||
}
|
||||
|
||||
@@ -638,13 +599,8 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
|
||||
*/
|
||||
void compute (const MatrixType& matrix)
|
||||
{
|
||||
// Pastix supports only lower, column-major matrices
|
||||
Base::PastixInit(); // This is necessary to let PaStiX initialize its data structure between successive calls to compute
|
||||
SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
|
||||
PermutationMatrix<Dynamic,Dynamic,Index> pnull;
|
||||
temp.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>().twistedBy(pnull);
|
||||
m_iparm(IPARM_SYM) = API_SYM_YES;
|
||||
m_iparm(IPARM_FACTORIZATION) = API_FACT_LLT;
|
||||
ColSpMatrix temp;
|
||||
grabMatrix(matrix, temp);
|
||||
Base::compute(temp);
|
||||
}
|
||||
|
||||
@@ -654,13 +610,8 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
|
||||
*/
|
||||
void analyzePattern(const MatrixType& matrix)
|
||||
{
|
||||
Base::PastixInit();
|
||||
// Pastix supports only lower, column-major matrices
|
||||
SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
|
||||
PermutationMatrix<Dynamic,Dynamic,Index> pnull;
|
||||
temp.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>().twistedBy(pnull);
|
||||
m_iparm(IPARM_SYM) = API_SYM_YES;
|
||||
m_iparm(IPARM_FACTORIZATION) = API_FACT_LLT;
|
||||
ColSpMatrix temp;
|
||||
grabMatrix(matrix, temp);
|
||||
Base::analyzePattern(temp);
|
||||
}
|
||||
/** Compute the LL^T supernodal numerical factorization of \p matrix
|
||||
@@ -668,19 +619,25 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
|
||||
*/
|
||||
void factorize(const MatrixType& matrix)
|
||||
{
|
||||
// Pastix supports only lower, column-major matrices
|
||||
SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
|
||||
PermutationMatrix<Dynamic,Dynamic,Index> pnull;
|
||||
temp.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>().twistedBy(pnull);
|
||||
m_iparm(IPARM_SYM) = API_SYM_YES;
|
||||
m_iparm(IPARM_FACTORIZATION) = API_FACT_LLT;
|
||||
ColSpMatrix temp;
|
||||
grabMatrix(matrix, temp);
|
||||
Base::factorize(temp);
|
||||
}
|
||||
protected:
|
||||
using Base::m_iparm;
|
||||
|
||||
private:
|
||||
PastixLLT(PastixLLT& ) {}
|
||||
void init()
|
||||
{
|
||||
m_iparm(IPARM_SYM) = API_SYM_YES;
|
||||
m_iparm(IPARM_FACTORIZATION) = API_FACT_LLT;
|
||||
}
|
||||
|
||||
void grabMatrix(const MatrixType& matrix, ColSpMatrix& out)
|
||||
{
|
||||
// Pastix supports only lower, column-major matrices
|
||||
out.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>();
|
||||
internal::c_to_fortran_numbering(out);
|
||||
}
|
||||
};
|
||||
|
||||
/** \ingroup PaStiXSupport_Module
|
||||
@@ -700,18 +657,21 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
|
||||
template<typename _MatrixType, int _UpLo>
|
||||
class PastixLDLT : public PastixBase< PastixLDLT<_MatrixType, _UpLo> >
|
||||
{
|
||||
public:
|
||||
public:
|
||||
typedef _MatrixType MatrixType;
|
||||
typedef PastixBase<PastixLDLT<MatrixType, _UpLo> > Base;
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::Index Index;
|
||||
typedef typename Base::ColSpMatrix ColSpMatrix;
|
||||
|
||||
public:
|
||||
enum { UpLo = _UpLo };
|
||||
PastixLDLT():Base() {}
|
||||
PastixLDLT():Base()
|
||||
{
|
||||
init();
|
||||
}
|
||||
|
||||
PastixLDLT(const MatrixType& matrix):Base()
|
||||
{
|
||||
init();
|
||||
compute(matrix);
|
||||
}
|
||||
|
||||
@@ -720,13 +680,8 @@ public:
|
||||
*/
|
||||
void compute (const MatrixType& matrix)
|
||||
{
|
||||
Base::PastixInit();
|
||||
// Pastix supports only lower, column-major matrices
|
||||
SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
|
||||
PermutationMatrix<Dynamic,Dynamic,Index> pnull;
|
||||
temp.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>().twistedBy(pnull);
|
||||
m_iparm(IPARM_SYM) = API_SYM_YES;
|
||||
m_iparm(IPARM_FACTORIZATION) = API_FACT_LDLT;
|
||||
ColSpMatrix temp;
|
||||
grabMatrix(matrix, temp);
|
||||
Base::compute(temp);
|
||||
}
|
||||
|
||||
@@ -736,14 +691,8 @@ public:
|
||||
*/
|
||||
void analyzePattern(const MatrixType& matrix)
|
||||
{
|
||||
Base::PastixInit();
|
||||
// Pastix supports only lower, column-major matrices
|
||||
SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
|
||||
PermutationMatrix<Dynamic,Dynamic,Index> pnull;
|
||||
temp.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>().twistedBy(pnull);
|
||||
|
||||
m_iparm(IPARM_SYM) = API_SYM_YES;
|
||||
m_iparm(IPARM_FACTORIZATION) = API_FACT_LDLT;
|
||||
ColSpMatrix temp;
|
||||
grabMatrix(matrix, temp);
|
||||
Base::analyzePattern(temp);
|
||||
}
|
||||
/** Compute the LDL^T supernodal numerical factorization of \p matrix
|
||||
@@ -751,21 +700,26 @@ public:
|
||||
*/
|
||||
void factorize(const MatrixType& matrix)
|
||||
{
|
||||
// Pastix supports only lower, column-major matrices
|
||||
SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
|
||||
PermutationMatrix<Dynamic,Dynamic,Index> pnull;
|
||||
temp.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>().twistedBy(pnull);
|
||||
|
||||
m_iparm(IPARM_SYM) = API_SYM_YES;
|
||||
m_iparm(IPARM_FACTORIZATION) = API_FACT_LDLT;
|
||||
ColSpMatrix temp;
|
||||
grabMatrix(matrix, temp);
|
||||
Base::factorize(temp);
|
||||
}
|
||||
|
||||
protected:
|
||||
using Base::m_iparm;
|
||||
|
||||
private:
|
||||
PastixLDLT(PastixLDLT& ) {}
|
||||
void init()
|
||||
{
|
||||
m_iparm(IPARM_SYM) = API_SYM_YES;
|
||||
m_iparm(IPARM_FACTORIZATION) = API_FACT_LDLT;
|
||||
}
|
||||
|
||||
void grabMatrix(const MatrixType& matrix, ColSpMatrix& out)
|
||||
{
|
||||
// Pastix supports only lower, column-major matrices
|
||||
out.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>();
|
||||
internal::c_to_fortran_numbering(out);
|
||||
}
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
@@ -76,6 +76,9 @@ enum SimplicialCholeskyMode {
|
||||
* These classes provide LL^T and LDL^T Cholesky factorizations of sparse matrices that are
|
||||
* selfadjoint and positive definite. The factorization allows for solving A.X = B where
|
||||
* X and B can be either dense or sparse.
|
||||
*
|
||||
* In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization
|
||||
* such that the factorized matrix is P A P^-1.
|
||||
*
|
||||
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
|
||||
@@ -208,7 +211,7 @@ class SimplicialCholeskyBase : internal::noncopyable
|
||||
return;
|
||||
|
||||
if(m_P.size()>0)
|
||||
dest = m_Pinv * b;
|
||||
dest = m_P * b;
|
||||
else
|
||||
dest = b;
|
||||
|
||||
@@ -222,7 +225,7 @@ class SimplicialCholeskyBase : internal::noncopyable
|
||||
derived().matrixU().solveInPlace(dest);
|
||||
|
||||
if(m_P.size()>0)
|
||||
dest = m_P * dest;
|
||||
dest = m_Pinv * dest;
|
||||
}
|
||||
|
||||
/** \internal */
|
||||
@@ -268,7 +271,7 @@ class SimplicialCholeskyBase : internal::noncopyable
|
||||
eigen_assert(a.rows()==a.cols());
|
||||
int size = a.cols();
|
||||
CholMatrixType ap(size,size);
|
||||
ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_Pinv);
|
||||
ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P);
|
||||
factorize_preordered<DoLDLT>(ap);
|
||||
}
|
||||
|
||||
@@ -358,6 +361,9 @@ template<typename _MatrixType, int _UpLo> struct traits<SimplicialCholesky<_Matr
|
||||
* This class provides a LL^T Cholesky factorizations of sparse matrices that are
|
||||
* selfadjoint and positive definite. The factorization allows for solving A.X = B where
|
||||
* X and B can be either dense or sparse.
|
||||
*
|
||||
* In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization
|
||||
* such that the factorized matrix is P A P^-1.
|
||||
*
|
||||
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
|
||||
@@ -443,6 +449,9 @@ public:
|
||||
* This class provides a LDL^T Cholesky factorizations without square root of sparse matrices that are
|
||||
* selfadjoint and positive definite. The factorization allows for solving A.X = B where
|
||||
* X and B can be either dense or sparse.
|
||||
*
|
||||
* In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization
|
||||
* such that the factorized matrix is P A P^-1.
|
||||
*
|
||||
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
|
||||
@@ -628,7 +637,7 @@ public:
|
||||
return;
|
||||
|
||||
if(Base::m_P.size()>0)
|
||||
dest = Base::m_Pinv * b;
|
||||
dest = Base::m_P * b;
|
||||
else
|
||||
dest = b;
|
||||
|
||||
@@ -652,7 +661,7 @@ public:
|
||||
}
|
||||
|
||||
if(Base::m_P.size()>0)
|
||||
dest = Base::m_P * dest;
|
||||
dest = Base::m_Pinv * dest;
|
||||
}
|
||||
|
||||
Scalar determinant() const
|
||||
@@ -678,22 +687,23 @@ void SimplicialCholeskyBase<Derived>::ordering(const MatrixType& a, CholMatrixTy
|
||||
eigen_assert(a.rows()==a.cols());
|
||||
const Index size = a.rows();
|
||||
// TODO allows to configure the permutation
|
||||
// Note that amd compute the inverse permutation
|
||||
{
|
||||
CholMatrixType C;
|
||||
C = a.template selfadjointView<UpLo>();
|
||||
// remove diagonal entries:
|
||||
// seems not to be needed
|
||||
// C.prune(keep_diag());
|
||||
internal::minimum_degree_ordering(C, m_P);
|
||||
internal::minimum_degree_ordering(C, m_Pinv);
|
||||
}
|
||||
|
||||
if(m_P.size()>0)
|
||||
m_Pinv = m_P.inverse();
|
||||
if(m_Pinv.size()>0)
|
||||
m_P = m_Pinv.inverse();
|
||||
else
|
||||
m_Pinv.resize(0);
|
||||
m_P.resize(0);
|
||||
|
||||
ap.resize(size,size);
|
||||
ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_Pinv);
|
||||
ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P);
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
|
||||
@@ -372,7 +372,7 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
|
||||
const typename SparseDenseProductReturnType<Derived,OtherDerived>::Type
|
||||
operator*(const MatrixBase<OtherDerived> &other) const;
|
||||
|
||||
/** \returns an expression of P^-1 H P */
|
||||
/** \returns an expression of P H P^-1 where H is the matrix represented by \c *this */
|
||||
SparseSymmetricPermutationProduct<Derived,Upper|Lower> twistedBy(const PermutationMatrix<Dynamic,Dynamic,Index>& perm) const
|
||||
{
|
||||
return SparseSymmetricPermutationProduct<Derived,Upper|Lower>(derived(), perm);
|
||||
|
||||
@@ -125,7 +125,7 @@ template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView
|
||||
_dest = tmp;
|
||||
}
|
||||
|
||||
/** \returns an expression of P^-1 H P */
|
||||
/** \returns an expression of P H P^-1 */
|
||||
SparseSymmetricPermutationProduct<_MatrixTypeNested,UpLo> twistedBy(const PermutationMatrix<Dynamic,Dynamic,Index>& perm) const
|
||||
{
|
||||
return SparseSymmetricPermutationProduct<_MatrixTypeNested,UpLo>(m_matrix, perm);
|
||||
|
||||
@@ -36,7 +36,7 @@
|
||||
# define EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
|
||||
# endif
|
||||
# include <windows.h>
|
||||
#elif __APPLE__
|
||||
#elif defined(__APPLE__)
|
||||
#include <CoreServices/CoreServices.h>
|
||||
#include <mach/mach_time.h>
|
||||
#else
|
||||
|
||||
@@ -5,6 +5,7 @@ axpby ; "{/*1.5 Y = alpha X + beta Y}" ; "vector size" ; 5:1000000
|
||||
axpy ; "{/*1.5 Y += alpha X}" ; "vector size" ; 5:1000000
|
||||
matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:3000
|
||||
matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:3000
|
||||
trmm ; "{/*1.5 triangular matrix matrix product}" ; "matrix size" ; 4:3000
|
||||
trisolve_vector ; "{/*1.5 triangular solver - vector (X = inv(L) X)}" ; "size" ; 4:3000
|
||||
trisolve_matrix ; "{/*1.5 triangular solver - matrix (M = inv(L) M)}" ; "size" ; 4:3000
|
||||
cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:3000
|
||||
|
||||
@@ -38,6 +38,7 @@ source mk_mean_script.sh atv $1 11 50 300 1000 $mode $prefix
|
||||
source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 $mode $prefix
|
||||
source mk_mean_script.sh aat $1 11 100 300 1000 $mode $prefix
|
||||
# source mk_mean_script.sh ata $1 11 100 300 1000 $mode $prefix
|
||||
source mk_mean_script.sh trmm $1 11 100 300 1000 $mode $prefix
|
||||
source mk_mean_script.sh trisolve_vector $1 11 100 300 1000 $mode $prefix
|
||||
source mk_mean_script.sh trisolve_matrix $1 11 100 300 1000 $mode $prefix
|
||||
source mk_mean_script.sh cholesky $1 11 100 300 1000 $mode $prefix
|
||||
|
||||
@@ -195,16 +195,16 @@ public :
|
||||
}
|
||||
|
||||
static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix& X, int N){
|
||||
X = L.template triangularView<Lower>().solve(B);
|
||||
X = L.template triangularView<Upper>().solve(B);
|
||||
}
|
||||
|
||||
static inline void trmm(const gene_matrix & L, const gene_matrix& B, gene_matrix& X, int N){
|
||||
X = L.template triangularView<Lower>() * B;
|
||||
X.noalias() = L.template triangularView<Lower>() * B;
|
||||
}
|
||||
|
||||
static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
|
||||
C = X;
|
||||
internal::llt_inplace<Lower>::blocked(C);
|
||||
internal::llt_inplace<real,Lower>::blocked(C);
|
||||
//C = X.llt().matrixL();
|
||||
// C = X;
|
||||
// Cholesky<gene_matrix>::computeInPlace(C);
|
||||
|
||||
@@ -81,7 +81,7 @@ int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScal
|
||||
int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb)
|
||||
{
|
||||
// std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " " << *palpha << " " << *lda << " " << *ldb<< "\n";
|
||||
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex);
|
||||
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, internal::level3_blocking<Scalar,Scalar>&);
|
||||
static functype func[32];
|
||||
|
||||
static bool init = false;
|
||||
@@ -143,11 +143,17 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m,
|
||||
return xerbla_(SCALAR_SUFFIX_UP"TRSM ",&info,6);
|
||||
|
||||
int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4);
|
||||
|
||||
|
||||
if(SIDE(*side)==LEFT)
|
||||
func[code](*m, *n, a, *lda, b, *ldb);
|
||||
{
|
||||
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*m);
|
||||
func[code](*m, *n, a, *lda, b, *ldb, blocking);
|
||||
}
|
||||
else
|
||||
func[code](*n, *m, a, *lda, b, *ldb);
|
||||
{
|
||||
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*n);
|
||||
func[code](*n, *m, a, *lda, b, *ldb, blocking);
|
||||
}
|
||||
|
||||
if(alpha!=Scalar(1))
|
||||
matrix(b,*m,*n,*ldb) *= alpha;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
namespace Eigen {
|
||||
|
||||
/** \page TopicVectorization Vectorizaion
|
||||
/** \page TopicVectorization Vectorization
|
||||
|
||||
|
||||
TODO: write this dox page!
|
||||
|
||||
46
doc/TopicMultithreading.dox
Normal file
46
doc/TopicMultithreading.dox
Normal file
@@ -0,0 +1,46 @@
|
||||
namespace Eigen {
|
||||
|
||||
/** \page TopicMultiThreading Eigen and multi-threading
|
||||
|
||||
\section TopicMultiThreading_MakingEigenMT Make Eigen run in parallel
|
||||
|
||||
Some Eigen's algorithms can exploit the multiple cores present in your hardware. To this end, it is enough to enable OpenMP on your compiler, for instance:
|
||||
* GCC: \c -fopenmp
|
||||
* ICC: \c -openmp
|
||||
* MSVC: check the respective option in the build properties.
|
||||
You can control the number of thread that will be used using either the OpenMP API or Eiegn's API using the following priority:
|
||||
\code
|
||||
OMP_NUM_THREADS=n ./my_program
|
||||
omp_set_num_threads(n);
|
||||
Eigen::setNbThreads(n);
|
||||
\endcode
|
||||
Unless setNbThreads has been called, Eigen uses the number of threads specified by OpenMP. You can restore this bahavior by calling \code setNbThreads(0); \endcode
|
||||
You can query the number of threads that will be used with:
|
||||
\code
|
||||
n = Eigen::nbThreads(n);
|
||||
\endcode
|
||||
You can disable Eigen's multi threading at compile time by defining the EIGEN_DONT_PARALLELIZE preprocessor token.
|
||||
|
||||
Currently, the following algorithms can make use of multi-threading:
|
||||
* general matrix - matrix products
|
||||
* PartialPivLU
|
||||
|
||||
\section TopicMultiThreading_UsingEigenWithMT Using Eigen in a multi-threaded application
|
||||
|
||||
In the case your own application is multithreaded, and multiple threads make calls to Eigen, then you have to initialize Eigen by calling the following routine \b before creating the threads:
|
||||
\code
|
||||
#include <Eigen/Core>
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
Eigen::initParallel();
|
||||
|
||||
...
|
||||
}
|
||||
\endcode
|
||||
|
||||
In the case your application is parallelized with OpenMP, you might want to disable Eigen's own parallization as detailed in the previous section.
|
||||
|
||||
*/
|
||||
|
||||
}
|
||||
@@ -18,13 +18,15 @@ set(LAPACK_FOUND TRUE)
|
||||
set(BLAS_LIBRARIES eigen_blas)
|
||||
set(LAPACK_LIBRARIES eigen_lapack)
|
||||
|
||||
if(TEST_REAL_CASES)
|
||||
set(EIGEN_TEST_MATRIX_DIR "" CACHE STRING "Enable testing of realword sparse matrices contained in the specified path")
|
||||
if(EIGEN_TEST_MATRIX_DIR)
|
||||
if(NOT WIN32)
|
||||
add_definitions( -DTEST_REAL_CASES="${TEST_REAL_CASES}" )
|
||||
message(STATUS "Test realworld sparse matrices: ${EIGEN_TEST_MATRIX_DIR}")
|
||||
add_definitions( -DTEST_REAL_CASES="${EIGEN_TEST_MATRIX_DIR}" )
|
||||
else(NOT WIN32)
|
||||
message(STATUS, "REAL CASES CAN NOT BE CURRENTLY TESTED ON WIN32")
|
||||
message(STATUS "REAL CASES CAN NOT BE CURRENTLY TESTED ON WIN32")
|
||||
endif(NOT WIN32)
|
||||
endif(TEST_REAL_CASES)
|
||||
endif(EIGEN_TEST_MATRIX_DIR)
|
||||
|
||||
set(SPARSE_LIBS " ")
|
||||
|
||||
|
||||
@@ -129,13 +129,20 @@ void ctms_decompositions()
|
||||
0,
|
||||
maxSize, maxSize> ComplexMatrix;
|
||||
|
||||
const Matrix A(Matrix::Random(size, size));
|
||||
const Matrix A(Matrix::Random(size, size)), B(Matrix::Random(size, size));
|
||||
Matrix X(size,size);
|
||||
const ComplexMatrix complexA(ComplexMatrix::Random(size, size));
|
||||
const Matrix saA = A.adjoint() * A;
|
||||
const Vector b(Vector::Random(size));
|
||||
Vector x(size);
|
||||
|
||||
// Cholesky module
|
||||
Eigen::LLT<Matrix> LLT; LLT.compute(A);
|
||||
X = LLT.solve(B);
|
||||
x = LLT.solve(b);
|
||||
Eigen::LDLT<Matrix> LDLT; LDLT.compute(A);
|
||||
X = LDLT.solve(B);
|
||||
x = LDLT.solve(b);
|
||||
|
||||
// Eigenvalues module
|
||||
Eigen::HessenbergDecomposition<ComplexMatrix> hessDecomp; hessDecomp.compute(complexA);
|
||||
@@ -147,12 +154,22 @@ void ctms_decompositions()
|
||||
|
||||
// LU module
|
||||
Eigen::PartialPivLU<Matrix> ppLU; ppLU.compute(A);
|
||||
X = ppLU.solve(B);
|
||||
x = ppLU.solve(b);
|
||||
Eigen::FullPivLU<Matrix> fpLU; fpLU.compute(A);
|
||||
X = fpLU.solve(B);
|
||||
x = fpLU.solve(b);
|
||||
|
||||
// QR module
|
||||
Eigen::HouseholderQR<Matrix> hQR; hQR.compute(A);
|
||||
X = hQR.solve(B);
|
||||
x = hQR.solve(b);
|
||||
Eigen::ColPivHouseholderQR<Matrix> cpQR; cpQR.compute(A);
|
||||
X = cpQR.solve(B);
|
||||
x = cpQR.solve(b);
|
||||
Eigen::FullPivHouseholderQR<Matrix> fpQR; fpQR.compute(A);
|
||||
X = fpQR.solve(B);
|
||||
x = fpQR.solve(b);
|
||||
|
||||
// SVD module
|
||||
Eigen::JacobiSVD<Matrix> jSVD; jSVD.compute(A, ComputeFullU | ComputeFullV);
|
||||
|
||||
Reference in New Issue
Block a user