Compare commits

...

19 Commits

Author SHA1 Message Date
Gael Guennebaud
4ca5735de4 bump to 3.1.0-rc1 2012-06-14 21:25:50 +02:00
Gael Guennebaud
b9f25ee656 bug #466: better fix for the race condition: this new patch add an initParallel()
function which must be called at the initialization time of any multi-threaded
application calling Eigen from multiple threads.
2012-06-14 14:24:15 +02:00
Gael Guennebaud
a3e700db72 fix bug #475: .exp() now returns +inf when overflow occurs (SSE) 2012-06-14 10:38:39 +02:00
Gael Guennebaud
324ecf153b disable the MKL's vm*powx functions on windows 2012-06-14 09:49:57 +02:00
Gael Guennebaud
9c7b62415a simplify and clean a bit the Pastix support module 2012-06-12 16:47:14 +02:00
Gael Guennebaud
4e8523b835 update blas interface for trsm 2012-06-12 14:33:03 +02:00
Gael Guennebaud
88e051019b extend nomalloc unit test to test the solve calls 2012-06-12 13:12:47 +02:00
Gael Guennebaud
cd48254a87 fix inclusion order 2012-06-12 11:40:33 +02:00
Gael Guennebaud
924c7a9300 avoid dynamic allocation for fixed size triangular solving 2012-06-12 11:33:50 +02:00
Gael Guennebaud
bc580bbffb fix typo 2012-06-11 18:49:30 +02:00
Gael Guennebaud
f2849fac20 Fix bug #466: race condition destected by helgrind in manage_caching_sizes.
After all, the solution based on threadprivate is not that costly.
2012-06-08 17:29:02 +02:00
Gael Guennebaud
28d0a8580e workaround ICC 11.1 compilation issue 2012-06-08 14:13:28 +02:00
Gael Guennebaud
7e36d32b32 fix ambiguous calls in the functors by prefixing function calls with internal:: 2012-06-08 09:53:50 +02:00
Gael Guennebaud
5cec86cb1e BTL: add missing TRMM plots, update Eigen's interface 2012-06-07 18:35:38 +02:00
Gael Guennebaud
512e0b151b clean the support for testing existing sparse problems 2012-06-07 18:31:09 +02:00
Gael Guennebaud
83c932ed15 fix a warning 2012-06-07 18:22:13 +02:00
Gael Guennebaud
1e5e66b642 For consistency, Simplicial* now factorizes P A P^-1 (instead of P^-1 A P).
Document how is applied the permutation in Simplicial* .
2012-06-07 16:24:46 +02:00
Gael Guennebaud
63c6ab3e42 fix documentaion of twistedBy 2012-06-07 16:18:00 +02:00
Gael Guennebaud
c1edb7fd95 Added tag 3.1.0-beta1 for changeset b7a7285909 2012-06-06 22:34:08 +02:00
25 changed files with 406 additions and 316 deletions

View File

@@ -338,12 +338,9 @@ if(EIGEN_BUILD_BTL)
add_subdirectory(bench/btl EXCLUDE_FROM_ALL)
endif(EIGEN_BUILD_BTL)
if(TEST_REAL_CASES)
if(NOT WIN32)
add_subdirectory(bench/spbench EXCLUDE_FROM_ALL)
set(ENV(EIGEN_MATRIX_DIR) ${TEST_REAL_CASES})
endif(NOT WIN32)
endif(TEST_REAL_CASES)
if(NOT WIN32)
add_subdirectory(bench/spbench EXCLUDE_FROM_ALL)
endif(NOT WIN32)
ei_testing_print_summary()

View File

@@ -329,12 +329,12 @@ using std::ptrdiff_t;
#include "src/Core/GeneralProduct.h"
#include "src/Core/TriangularMatrix.h"
#include "src/Core/SelfAdjointView.h"
#include "src/Core/SolveTriangular.h"
#include "src/Core/products/GeneralBlockPanelKernel.h"
#include "src/Core/products/Parallelizer.h"
#include "src/Core/products/CoeffBasedProduct.h"
#include "src/Core/products/GeneralBlockPanelKernel.h"
#include "src/Core/products/GeneralMatrixVector.h"
#include "src/Core/products/GeneralMatrixMatrix.h"
#include "src/Core/SolveTriangular.h"
#include "src/Core/products/GeneralMatrixMatrixTriangular.h"
#include "src/Core/products/SelfadjointMatrixVector.h"
#include "src/Core/products/SelfadjointMatrixMatrix.h"

View File

@@ -209,10 +209,13 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
// The vm*powx functions are not avaibale in the windows version of MKL.
#ifdef _WIN32
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
#endif
} // end namespace internal

View File

@@ -295,7 +295,7 @@ struct functor_traits<scalar_opposite_op<Scalar> >
template<typename Scalar> struct scalar_abs_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)
typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return abs(a); }
EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return internal::abs(a); }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return internal::pabs(a); }
@@ -317,7 +317,7 @@ struct functor_traits<scalar_abs_op<Scalar> >
template<typename Scalar> struct scalar_abs2_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op)
typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return abs2(a); }
EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return internal::abs2(a); }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return internal::pmul(a,a); }
@@ -333,7 +333,7 @@ struct functor_traits<scalar_abs2_op<Scalar> >
*/
template<typename Scalar> struct scalar_conjugate_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op)
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return conj(a); }
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return internal::conj(a); }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); }
};
@@ -370,7 +370,7 @@ template<typename Scalar>
struct scalar_real_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op)
typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return real(a); }
EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return internal::real(a); }
};
template<typename Scalar>
struct functor_traits<scalar_real_op<Scalar> >
@@ -385,7 +385,7 @@ template<typename Scalar>
struct scalar_imag_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op)
typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return imag(a); }
EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return internal::imag(a); }
};
template<typename Scalar>
struct functor_traits<scalar_imag_op<Scalar> >
@@ -400,7 +400,7 @@ template<typename Scalar>
struct scalar_real_ref_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op)
typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return real_ref(*const_cast<Scalar*>(&a)); }
EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return internal::real_ref(*const_cast<Scalar*>(&a)); }
};
template<typename Scalar>
struct functor_traits<scalar_real_ref_op<Scalar> >
@@ -415,7 +415,7 @@ template<typename Scalar>
struct scalar_imag_ref_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op)
typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return imag_ref(*const_cast<Scalar*>(&a)); }
EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return internal::imag_ref(*const_cast<Scalar*>(&a)); }
};
template<typename Scalar>
struct functor_traits<scalar_imag_ref_op<Scalar> >
@@ -429,7 +429,7 @@ struct functor_traits<scalar_imag_ref_op<Scalar> >
*/
template<typename Scalar> struct scalar_exp_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
inline const Scalar operator() (const Scalar& a) const { return exp(a); }
inline const Scalar operator() (const Scalar& a) const { return internal::exp(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
};
@@ -445,7 +445,7 @@ struct functor_traits<scalar_exp_op<Scalar> >
*/
template<typename Scalar> struct scalar_log_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
inline const Scalar operator() (const Scalar& a) const { return log(a); }
inline const Scalar operator() (const Scalar& a) const { return internal::log(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
};
@@ -703,7 +703,7 @@ struct functor_traits<scalar_add_op<Scalar> >
*/
template<typename Scalar> struct scalar_sqrt_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
inline const Scalar operator() (const Scalar& a) const { return sqrt(a); }
inline const Scalar operator() (const Scalar& a) const { return internal::sqrt(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
};
@@ -721,7 +721,7 @@ struct functor_traits<scalar_sqrt_op<Scalar> >
*/
template<typename Scalar> struct scalar_cos_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
inline Scalar operator() (const Scalar& a) const { return cos(a); }
inline Scalar operator() (const Scalar& a) const { return internal::cos(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
};
@@ -740,7 +740,7 @@ struct functor_traits<scalar_cos_op<Scalar> >
*/
template<typename Scalar> struct scalar_sin_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
inline const Scalar operator() (const Scalar& a) const { return sin(a); }
inline const Scalar operator() (const Scalar& a) const { return internal::sin(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
};
@@ -760,7 +760,7 @@ struct functor_traits<scalar_sin_op<Scalar> >
*/
template<typename Scalar> struct scalar_tan_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
inline const Scalar operator() (const Scalar& a) const { return tan(a); }
inline const Scalar operator() (const Scalar& a) const { return internal::tan(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
};
@@ -779,7 +779,7 @@ struct functor_traits<scalar_tan_op<Scalar> >
*/
template<typename Scalar> struct scalar_acos_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
inline const Scalar operator() (const Scalar& a) const { return acos(a); }
inline const Scalar operator() (const Scalar& a) const { return internal::acos(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
};
@@ -798,7 +798,7 @@ struct functor_traits<scalar_acos_op<Scalar> >
*/
template<typename Scalar> struct scalar_asin_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
inline const Scalar operator() (const Scalar& a) const { return asin(a); }
inline const Scalar operator() (const Scalar& a) const { return internal::asin(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
};

View File

@@ -100,12 +100,22 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
typedef typename Rhs::Index Index;
typedef blas_traits<Lhs> LhsProductTraits;
typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
static void run(const Lhs& lhs, Rhs& rhs)
{
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsProductTraits::extract(lhs);
const Index size = lhs.rows();
const Index othersize = Side==OnTheLeft? rhs.cols() : rhs.rows();
typedef internal::gemm_blocking_space<(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
Rhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxRowsAtCompileTime,4> BlockingType;
BlockingType blocking(rhs.rows(), rhs.cols(), size);
triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor>
::run(lhs.rows(), Side==OnTheLeft? rhs.cols() : rhs.rows(), &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride());
::run(size, othersize, &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride(), blocking);
}
};

View File

@@ -123,7 +123,7 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
_EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647949f);
_EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
_EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
@@ -170,7 +170,7 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
y = pmadd(y, z, x);
y = padd(y, p4f_1);
/* build 2^n */
// build 2^n
emm0 = _mm_cvttps_epi32(fx);
emm0 = _mm_add_epi32(emm0, p4i_0x7f);
emm0 = _mm_slli_epi32(emm0, 23);

View File

@@ -26,7 +26,7 @@
#define EIGEN_GENERAL_BLOCK_PANEL_H
namespace Eigen {
namespace internal {
template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false>
@@ -42,9 +42,14 @@ inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff
/** \internal */
inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0)
{
static std::ptrdiff_t m_l1CacheSize = manage_caching_sizes_helper(queryL1CacheSize(),8 * 1024);
static std::ptrdiff_t m_l2CacheSize = manage_caching_sizes_helper(queryTopLevelCacheSize(),1*1024*1024);
static std::ptrdiff_t m_l1CacheSize = 0;
static std::ptrdiff_t m_l2CacheSize = 0;
if(m_l2CacheSize==0)
{
m_l1CacheSize = manage_caching_sizes_helper(queryL1CacheSize(),8 * 1024);
m_l2CacheSize = manage_caching_sizes_helper(queryTopLevelCacheSize(),1*1024*1024);
}
if(action==SetAction)
{
// set the cpu cache size and cache all block sizes from a global cache size in byte

View File

@@ -79,7 +79,7 @@ static void run(Index rows, Index cols, Index depth,
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
Index kc = blocking.kc(); // cache block size along the K direction
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
//Index nc = blocking.nc(); // cache block size along the N direction
@@ -249,7 +249,7 @@ struct gemm_functor
BlockingType& m_blocking;
};
template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth,
template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor=1,
bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class gemm_blocking_space;
template<typename _LhsScalar, typename _RhsScalar>
@@ -282,8 +282,8 @@ class level3_blocking
inline RhsScalar* blockW() { return m_blockW; }
};
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth>
class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, true>
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true>
: public level3_blocking<
typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
@@ -324,8 +324,8 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
inline void allocateAll() {}
};
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth>
class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, false>
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, false>
: public level3_blocking<
typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
@@ -349,7 +349,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
this->m_nc = Transpose ? rows : cols;
this->m_kc = depth;
computeProductBlockingSizes<LhsScalar,RhsScalar>(this->m_kc, this->m_mc, this->m_nc);
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, this->m_nc);
m_sizeA = this->m_mc * this->m_kc;
m_sizeB = this->m_kc * this->m_nc;
m_sizeW = this->m_kc*Traits::WorkSpaceFactor;

View File

@@ -57,12 +57,23 @@ inline void manage_multi_threading(Action action, int* v)
}
}
}
/** Must be call first when calling Eigen from multiple threads */
inline void initParallel()
{
int nbt;
internal::manage_multi_threading(GetAction, &nbt);
std::ptrdiff_t l1, l2;
internal::manage_caching_sizes(GetAction, &l1, &l2);
}
/** \returns the max number of threads reserved for Eigen
* \sa setNbThreads */
inline int nbThreads()
{
int ret;
manage_multi_threading(GetAction, &ret);
internal::manage_multi_threading(GetAction, &ret);
return ret;
}
@@ -70,9 +81,11 @@ inline int nbThreads()
* \sa nbThreads */
inline void setNbThreads(int v)
{
manage_multi_threading(SetAction, &v);
internal::manage_multi_threading(SetAction, &v);
}
namespace internal {
template<typename Index> struct GemmParallelInfo
{
GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0) {}
@@ -121,6 +134,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
if(threads==1)
return func(0,rows, 0,cols);
Eigen::initParallel();
func.initParallelSession();
if(transpose)

View File

@@ -36,14 +36,15 @@ struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,
static EIGEN_DONT_INLINE void run(
Index size, Index cols,
const Scalar* tri, Index triStride,
Scalar* _other, Index otherStride)
Scalar* _other, Index otherStride,
level3_blocking<Scalar,Scalar>& blocking)
{
triangular_solve_matrix<
Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft,
(Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper),
NumTraits<Scalar>::IsComplex && Conjugate,
TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor>
::run(size, cols, tri, triStride, _other, otherStride);
::run(size, cols, tri, triStride, _other, otherStride, blocking);
}
};
@@ -55,7 +56,8 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
static EIGEN_DONT_INLINE void run(
Index size, Index otherSize,
const Scalar* _tri, Index triStride,
Scalar* _other, Index otherStride)
Scalar* _other, Index otherStride,
level3_blocking<Scalar,Scalar>& blocking)
{
Index cols = otherSize;
const_blas_data_mapper<Scalar, Index, TriStorageOrder> tri(_tri,triStride);
@@ -67,17 +69,16 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
IsLower = (Mode&Lower) == Lower
};
Index kc = size; // cache block size along the K direction
Index mc = size; // cache block size along the M direction
Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(size,blocking.mc()); // cache block size along the M direction
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* blockB = allocatedBlockB + sizeW;
Scalar* blockW = allocatedBlockB;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
conj_if<Conjugate> conj;
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
@@ -181,7 +182,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
{
pack_lhs(blockA, &tri(i2, IsLower ? k2 : k2-kc), triStride, actual_kc, actual_mc);
gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1));
gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0, blockW);
}
}
}
@@ -197,7 +198,8 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
static EIGEN_DONT_INLINE void run(
Index size, Index otherSize,
const Scalar* _tri, Index triStride,
Scalar* _other, Index otherStride)
Scalar* _other, Index otherStride,
level3_blocking<Scalar,Scalar>& blocking)
{
Index rows = otherSize;
const_blas_data_mapper<Scalar, Index, TriStorageOrder> rhs(_tri,triStride);
@@ -210,19 +212,16 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
IsLower = (Mode&Lower) == Lower
};
// Index kc = std::min<Index>(Traits::Max_kc/4,size); // cache block size along the K direction
// Index mc = std::min<Index>(Traits::Max_mc,size); // cache block size along the M direction
// check that !!!!
Index kc = size; // cache block size along the K direction
Index mc = size; // cache block size along the M direction
Index nc = rows; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*size;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*size;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* blockB = allocatedBlockB + sizeW;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
conj_if<Conjugate> conj;
gebp_kernel<Scalar,Scalar, Index, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
@@ -289,7 +288,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
Scalar(-1),
actual_kc, actual_kc, // strides
panelOffset, panelOffset, // offsets
allocatedBlockB); // workspace
blockW); // workspace
}
// unblocked triangular solve
@@ -320,7 +319,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
if (rs>0)
gebp_kernel(_other+i2+startPanel*otherStride, otherStride, blockA, geb,
actual_mc, actual_kc, rs, Scalar(-1),
-1, -1, 0, 0, allocatedBlockB);
-1, -1, 0, 0, blockW);
}
}
}

View File

@@ -28,7 +28,7 @@
#define EIGEN_WORLD_VERSION 3
#define EIGEN_MAJOR_VERSION 0
#define EIGEN_MINOR_VERSION 93
#define EIGEN_MINOR_VERSION 94
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \

View File

@@ -468,15 +468,40 @@ public:
{
return internal::transform_transform_product_impl<Transform,Transform>::run(*this,other);
}
#ifdef __INTEL_COMPILER
private:
// this intermediate structure permits to workaround a bug in ICC 11:
// error: template instantiation resulted in unexpected function type of "Eigen::Transform<double, 3, 32, 0>
// (const Eigen::Transform<double, 3, 2, 0> &) const"
// (the meaning of a name may have changed since the template declaration -- the type of the template is:
// "Eigen::internal::transform_transform_product_impl<Eigen::Transform<double, 3, 32, 0>,
// Eigen::Transform<double, 3, Mode, Options>, <expression>>::ResultType (const Eigen::Transform<double, 3, Mode, Options> &) const")
//
template<int OtherMode,int OtherOptions> struct icc_11_workaround
{
typedef internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> > ProductType;
typedef typename ProductType::ResultType ResultType;
};
public:
/** Concatenates two different transformations */
template<int OtherMode,int OtherOptions>
inline const typename internal::transform_transform_product_impl<
Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::ResultType
inline typename icc_11_workaround<OtherMode,OtherOptions>::ResultType
operator * (const Transform<Scalar,Dim,OtherMode,OtherOptions>& other) const
{
typedef typename icc_11_workaround<OtherMode,OtherOptions>::ProductType ProductType;
return ProductType::run(*this,other);
}
#else
/** Concatenates two different transformations */
template<int OtherMode,int OtherOptions>
inline typename internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::ResultType
operator * (const Transform<Scalar,Dim,OtherMode,OtherOptions>& other) const
{
return internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::run(*this,other);
}
#endif
/** \sa MatrixBase::setIdentity() */
void setIdentity() { m_matrix.setIdentity(); }

View File

@@ -35,7 +35,6 @@ namespace Eigen {
*
* \sa TutorialSparseDirectSolvers
*/
template<typename _MatrixType, bool IsStrSym = false> class PastixLU;
template<typename _MatrixType, int Options> class PastixLLT;
template<typename _MatrixType, int Options> class PastixLDLT;
@@ -75,32 +74,34 @@ namespace internal
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm)
{
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
if (nbrhs == 0) x = NULL;
if (nbrhs == 0) {x = NULL; nbrhs=1;}
s_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm);
}
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm)
{
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
if (nbrhs == 0) x = NULL;
if (nbrhs == 0) {x = NULL; nbrhs=1;}
d_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm);
}
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<float> *vals, int *perm, int * invp, std::complex<float> *x, int nbrhs, int *iparm, double *dparm)
{
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
if (nbrhs == 0) {x = NULL; nbrhs=1;}
c_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<COMPLEX*>(vals), perm, invp, reinterpret_cast<COMPLEX*>(x), nbrhs, iparm, dparm);
}
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<double> *vals, int *perm, int * invp, std::complex<double> *x, int nbrhs, int *iparm, double *dparm)
{
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
if (nbrhs == 0) x = NULL;
if (nbrhs == 0) {x = NULL; nbrhs=1;}
z_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<DCOMPLEX*>(vals), perm, invp, reinterpret_cast<DCOMPLEX*>(x), nbrhs, iparm, dparm);
}
// Convert the matrix to Fortran-style Numbering
template <typename MatrixType>
void EigenToFortranNumbering (MatrixType& mat)
void c_to_fortran_numbering (MatrixType& mat)
{
if ( !(mat.outerIndexPtr()[0]) )
{
@@ -114,7 +115,7 @@ namespace internal
// Convert to C-style Numbering
template <typename MatrixType>
void EigenToCNumbering (MatrixType& mat)
void fortran_to_c_numbering (MatrixType& mat)
{
// Check the Numbering
if ( mat.outerIndexPtr()[0] == 1 )
@@ -126,38 +127,12 @@ namespace internal
--mat.innerIndexPtr()[i];
}
}
// Symmetrize the graph of the input matrix
// In : The Input matrix to symmetrize the pattern
// Out : The output matrix
// StrMatTrans : The structural pattern of the transpose of In; It is
// used to optimize the future symmetrization with the same matrix pattern
// WARNING It is assumed here that successive calls to this routine are done
// with matrices having the same pattern.
template <typename MatrixType>
void EigenSymmetrizeMatrixGraph (const MatrixType& In, MatrixType& Out, MatrixType& StrMatTrans, bool& hasTranspose)
{
eigen_assert(In.cols()==In.rows() && " Can only symmetrize the graph of a square matrix");
if (!hasTranspose)
{ //First call to this routine, need to compute the structural pattern of In^T
StrMatTrans = In.transpose();
// Set the elements of the matrix to zero
for (int i = 0; i < StrMatTrans.rows(); i++)
{
for (typename MatrixType::InnerIterator it(StrMatTrans, i); it; ++it)
it.valueRef() = 0.0;
}
hasTranspose = true;
}
Out = (StrMatTrans + In).eval();
}
}
// This is the base class to interface with PaStiX functions.
// Users should not used this class directly.
template <class Derived>
class PastixBase
class PastixBase : internal::noncopyable
{
public:
typedef typename internal::pastix_traits<Derived>::MatrixType _MatrixType;
@@ -166,29 +141,19 @@ class PastixBase
typedef typename MatrixType::RealScalar RealScalar;
typedef typename MatrixType::Index Index;
typedef Matrix<Scalar,Dynamic,1> Vector;
typedef SparseMatrix<Scalar, ColMajor> ColSpMatrix;
public:
PastixBase():m_initisOk(false),m_analysisIsOk(false),m_factorizationIsOk(false),m_isInitialized(false)
PastixBase() : m_initisOk(false), m_analysisIsOk(false), m_factorizationIsOk(false), m_isInitialized(false), m_pastixdata(0), m_size(0)
{
m_pastixdata = 0;
m_hasTranspose = false;
PastixInit();
init();
}
~PastixBase()
{
PastixDestroy();
clean();
}
// Initialize the Pastix data structure, check the matrix
void PastixInit();
// Compute the ordering and the symbolic factorization
Derived& analyzePattern (MatrixType& mat);
// Compute the numerical factorization
Derived& factorize (MatrixType& mat);
/** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
*
@@ -269,7 +234,6 @@ class PastixBase
/** Return a reference to a particular index parameter of the DPARM vector
* \sa dparm()
*/
double& dparm(int idxparam)
{
return m_dparm(idxparam);
@@ -307,17 +271,27 @@ class PastixBase
}
protected:
// Initialize the Pastix data structure, check the matrix
void init();
// Compute the ordering and the symbolic factorization
void analyzePattern(ColSpMatrix& mat);
// Compute the numerical factorization
void factorize(ColSpMatrix& mat);
// Free all the data allocated by Pastix
void PastixDestroy()
void clean()
{
eigen_assert(m_initisOk && "The Pastix structure should be allocated first");
m_iparm(IPARM_START_TASK) = API_TASK_CLEAN;
m_iparm(IPARM_END_TASK) = API_TASK_CLEAN;
internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, 0, m_mat_null.outerIndexPtr(), m_mat_null.innerIndexPtr(),
m_mat_null.valuePtr(), m_perm.data(), m_invp.data(), m_vec_null.data(), 1, m_iparm.data(), m_dparm.data());
internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, 0, 0, 0, (Scalar*)0,
m_perm.data(), m_invp.data(), 0, 0, m_iparm.data(), m_dparm.data());
}
Derived& compute (MatrixType& mat);
void compute(ColSpMatrix& mat);
int m_initisOk;
int m_analysisIsOk;
@@ -325,22 +299,12 @@ class PastixBase
bool m_isInitialized;
mutable ComputationInfo m_info;
mutable pastix_data_t *m_pastixdata; // Data structure for pastix
mutable SparseMatrix<Scalar, ColMajor> m_mat_null; // An input null matrix
mutable Matrix<Scalar, Dynamic,1> m_vec_null; // An input null vector
mutable SparseMatrix<Scalar, ColMajor> m_StrMatTrans; // The transpose pattern of the input matrix
mutable bool m_hasTranspose; // The transpose of the current matrix has already been computed
mutable int m_comm; // The MPI communicator identifier
mutable Matrix<Index,IPARM_SIZE,1> m_iparm; // integer vector for the input parameters
mutable Matrix<int,IPARM_SIZE,1> m_iparm; // integer vector for the input parameters
mutable Matrix<double,DPARM_SIZE,1> m_dparm; // Scalar vector for the input parameters
mutable Matrix<Index,Dynamic,1> m_perm; // Permutation vector
mutable Matrix<Index,Dynamic,1> m_invp; // Inverse permutation vector
mutable int m_ordering; // ordering method to use
mutable int m_amalgamation; // level of amalgamation
mutable int m_size; // Size of the matrix
private:
PastixBase(PastixBase& ) {}
};
/** Initialize the PaStiX data structure.
@@ -348,29 +312,29 @@ class PastixBase
* \sa iparm() dparm()
*/
template <class Derived>
void PastixBase<Derived>::PastixInit()
void PastixBase<Derived>::init()
{
m_size = 0;
m_iparm.resize(IPARM_SIZE);
m_dparm.resize(DPARM_SIZE);
m_iparm.setZero(IPARM_SIZE);
m_dparm.setZero(DPARM_SIZE);
m_iparm(IPARM_MODIFY_PARAMETER) = API_NO;
if(m_pastixdata)
{ // This trick is used to reset the Pastix internal data between successive
// calls with (structural) different matrices
PastixDestroy();
m_pastixdata = 0;
m_iparm(IPARM_MODIFY_PARAMETER) = API_YES;
m_hasTranspose = false;
}
pastix(&m_pastixdata, MPI_COMM_WORLD,
0, 0, 0, 0,
0, 0, 0, 1, m_iparm.data(), m_dparm.data());
m_iparm[IPARM_MATRIX_VERIFICATION] = API_NO;
m_iparm[IPARM_VERBOSE] = 2;
m_iparm[IPARM_ORDERING] = API_ORDER_SCOTCH;
m_iparm[IPARM_INCOMPLETE] = API_NO;
m_iparm[IPARM_OOC_LIMIT] = 2000;
m_iparm[IPARM_RHS_MAKING] = API_RHS_B;
m_iparm(IPARM_MATRIX_VERIFICATION) = API_NO;
m_iparm(IPARM_START_TASK) = API_TASK_INIT;
m_iparm(IPARM_END_TASK) = API_TASK_INIT;
m_iparm(IPARM_MATRIX_VERIFICATION) = API_NO;
internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, 0, m_mat_null.outerIndexPtr(), m_mat_null.innerIndexPtr(),
m_mat_null.valuePtr(), m_perm.data(), m_invp.data(), m_vec_null.data(), 1, m_iparm.data(), m_dparm.data());
m_iparm(IPARM_MATRIX_VERIFICATION) = API_NO;
internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, 0, 0, 0, (Scalar*)0,
0, 0, 0, 0, m_iparm.data(), m_dparm.data());
// Check the returned error
if(m_iparm(IPARM_ERROR_NUMBER)) {
@@ -384,82 +348,74 @@ void PastixBase<Derived>::PastixInit()
}
template <class Derived>
Derived& PastixBase<Derived>::compute(MatrixType& mat)
void PastixBase<Derived>::compute(ColSpMatrix& mat)
{
eigen_assert(mat.rows() == mat.cols() && "The input matrix should be squared");
typedef typename MatrixType::Scalar Scalar;
// Save the size of the current matrix
m_size = mat.rows();
// Convert the matrix in fortran-style numbering
internal::EigenToFortranNumbering(mat);
analyzePattern(mat);
analyzePattern(mat);
factorize(mat);
m_iparm(IPARM_MATRIX_VERIFICATION) = API_NO;
if (m_factorizationIsOk) m_isInitialized = true;
//Convert back the matrix -- Is it really necessary here
internal::EigenToCNumbering(mat);
return derived();
m_isInitialized = m_factorizationIsOk;
}
template <class Derived>
Derived& PastixBase<Derived>::analyzePattern(MatrixType& mat)
{
eigen_assert(m_initisOk && "PastixInit should be called first to set the default parameters");
void PastixBase<Derived>::analyzePattern(ColSpMatrix& mat)
{
eigen_assert(m_initisOk && "The initialization of PaSTiX failed");
// clean previous calls
if(m_size>0)
clean();
m_size = mat.rows();
m_perm.resize(m_size);
m_invp.resize(m_size);
// Convert the matrix in fortran-style numbering
internal::EigenToFortranNumbering(mat);
m_iparm(IPARM_START_TASK) = API_TASK_ORDERING;
m_iparm(IPARM_END_TASK) = API_TASK_ANALYSE;
internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, m_size, mat.outerIndexPtr(), mat.innerIndexPtr(),
mat.valuePtr(), m_perm.data(), m_invp.data(), m_vec_null.data(), 0, m_iparm.data(), m_dparm.data());
mat.valuePtr(), m_perm.data(), m_invp.data(), 0, 0, m_iparm.data(), m_dparm.data());
// Check the returned error
if(m_iparm(IPARM_ERROR_NUMBER)) {
if(m_iparm(IPARM_ERROR_NUMBER))
{
m_info = NumericalIssue;
m_analysisIsOk = false;
}
else {
else
{
m_info = Success;
m_analysisIsOk = true;
}
return derived();
}
template <class Derived>
Derived& PastixBase<Derived>::factorize(MatrixType& mat)
void PastixBase<Derived>::factorize(ColSpMatrix& mat)
{
// if(&m_cpyMat != &mat) m_cpyMat = mat;
eigen_assert(m_analysisIsOk && "The analysis phase should be called before the factorization phase");
m_iparm(IPARM_START_TASK) = API_TASK_NUMFACT;
m_iparm(IPARM_END_TASK) = API_TASK_NUMFACT;
m_size = mat.rows();
// Convert the matrix in fortran-style numbering
internal::EigenToFortranNumbering(mat);
internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, m_size, mat.outerIndexPtr(), mat.innerIndexPtr(),
mat.valuePtr(), m_perm.data(), m_invp.data(), m_vec_null.data(), 0, m_iparm.data(), m_dparm.data());
mat.valuePtr(), m_perm.data(), m_invp.data(), 0, 0, m_iparm.data(), m_dparm.data());
// Check the returned error
if(m_iparm(IPARM_ERROR_NUMBER)) {
if(m_iparm(IPARM_ERROR_NUMBER))
{
m_info = NumericalIssue;
m_factorizationIsOk = false;
m_isInitialized = false;
}
else {
else
{
m_info = Success;
m_factorizationIsOk = true;
m_isInitialized = true;
}
return derived();
}
/* Solve the system */
@@ -475,20 +431,17 @@ bool PastixBase<Base>::_solve (const MatrixBase<Rhs> &b, MatrixBase<Dest> &x) co
x = b; /* on return, x is overwritten by the computed solution */
for (int i = 0; i < b.cols(); i++){
m_iparm(IPARM_START_TASK) = API_TASK_SOLVE;
m_iparm(IPARM_END_TASK) = API_TASK_REFINE;
m_iparm(IPARM_RHS_MAKING) = API_RHS_B;
internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, x.rows(), m_mat_null.outerIndexPtr(), m_mat_null.innerIndexPtr(),
m_mat_null.valuePtr(), m_perm.data(), m_invp.data(), &x(0, i), rhs, m_iparm.data(), m_dparm.data());
m_iparm[IPARM_START_TASK] = API_TASK_SOLVE;
m_iparm[IPARM_END_TASK] = API_TASK_REFINE;
internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, x.rows(), 0, 0, 0,
m_perm.data(), m_invp.data(), &x(0, i), rhs, m_iparm.data(), m_dparm.data());
}
// Check the returned error
if(m_iparm(IPARM_ERROR_NUMBER)) {
m_info = NumericalIssue;
return false;
}
else {
return true;
}
m_info = m_iparm(IPARM_ERROR_NUMBER)==0 ? Success : NumericalIssue;
return m_iparm(IPARM_ERROR_NUMBER)==0;
}
/** \ingroup PaStiXSupport_Module
@@ -516,14 +469,18 @@ class PastixLU : public PastixBase< PastixLU<_MatrixType> >
public:
typedef _MatrixType MatrixType;
typedef PastixBase<PastixLU<MatrixType> > Base;
typedef typename MatrixType::Scalar Scalar;
typedef SparseMatrix<Scalar, ColMajor> PaStiXType;
typedef typename Base::ColSpMatrix ColSpMatrix;
typedef typename MatrixType::Index Index;
public:
PastixLU():Base() {}
PastixLU() : Base()
{
init();
}
PastixLU(const MatrixType& matrix):Base()
{
init();
compute(matrix);
}
/** Compute the LU supernodal factorization of \p matrix.
@@ -533,18 +490,9 @@ class PastixLU : public PastixBase< PastixLU<_MatrixType> >
*/
void compute (const MatrixType& matrix)
{
// Pastix supports only column-major matrices with a symmetric pattern
Base::PastixInit();
PaStiXType temp(matrix.rows(), matrix.cols());
// Symmetrize the graph of the matrix
if (IsStrSym)
temp = matrix;
else
{
internal::EigenSymmetrizeMatrixGraph<PaStiXType>(matrix, temp, m_StrMatTrans, m_hasTranspose);
}
m_iparm[IPARM_SYM] = API_SYM_NO;
m_iparm(IPARM_FACTORIZATION) = API_FACT_LU;
m_structureIsUptodate = false;
ColSpMatrix temp;
grabMatrix(matrix, temp);
Base::compute(temp);
}
/** Compute the LU symbolic factorization of \p matrix using its sparsity pattern.
@@ -554,20 +502,9 @@ class PastixLU : public PastixBase< PastixLU<_MatrixType> >
*/
void analyzePattern(const MatrixType& matrix)
{
Base::PastixInit();
/* Pastix supports only column-major matrices with symmetrized patterns */
SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
// Symmetrize the graph of the matrix
if (IsStrSym)
temp = matrix;
else
{
internal::EigenSymmetrizeMatrixGraph<PaStiXType>(matrix, temp, m_StrMatTrans,m_hasTranspose);
}
m_iparm(IPARM_SYM) = API_SYM_NO;
m_iparm(IPARM_FACTORIZATION) = API_FACT_LU;
m_structureIsUptodate = false;
ColSpMatrix temp;
grabMatrix(matrix, temp);
Base::analyzePattern(temp);
}
@@ -578,27 +515,48 @@ class PastixLU : public PastixBase< PastixLU<_MatrixType> >
*/
void factorize(const MatrixType& matrix)
{
/* Pastix supports only column-major matrices with symmetrized patterns */
SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
// Symmetrize the graph of the matrix
if (IsStrSym)
temp = matrix;
else
{
internal::EigenSymmetrizeMatrixGraph<PaStiXType>(matrix, temp, m_StrMatTrans,m_hasTranspose);
}
m_iparm(IPARM_SYM) = API_SYM_NO;
m_iparm(IPARM_FACTORIZATION) = API_FACT_LU;
ColSpMatrix temp;
grabMatrix(matrix, temp);
Base::factorize(temp);
}
protected:
void init()
{
m_structureIsUptodate = false;
m_iparm(IPARM_SYM) = API_SYM_NO;
m_iparm(IPARM_FACTORIZATION) = API_FACT_LU;
}
void grabMatrix(const MatrixType& matrix, ColSpMatrix& out)
{
if(IsStrSym)
out = matrix;
else
{
if(!m_structureIsUptodate)
{
// update the transposed structure
m_transposedStructure = matrix.transpose();
// Set the elements of the matrix to zero
for (Index j=0; j<m_transposedStructure.outerSize(); ++j)
for(typename ColSpMatrix::InnerIterator it(m_transposedStructure, j); it; ++it)
it.valueRef() = 0.0;
m_structureIsUptodate = true;
}
out = m_transposedStructure + matrix;
}
internal::c_to_fortran_numbering(out);
}
using Base::m_iparm;
using Base::m_dparm;
using Base::m_StrMatTrans;
using Base::m_hasTranspose;
private:
PastixLU(PastixLU& ) {}
ColSpMatrix m_transposedStructure;
bool m_structureIsUptodate;
};
/** \ingroup PaStiXSupport_Module
@@ -621,15 +579,18 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
public:
typedef _MatrixType MatrixType;
typedef PastixBase<PastixLLT<MatrixType, _UpLo> > Base;
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::Index Index;
typedef typename Base::ColSpMatrix ColSpMatrix;
public:
enum { UpLo = _UpLo };
PastixLLT():Base() {}
PastixLLT() : Base()
{
init();
}
PastixLLT(const MatrixType& matrix):Base()
{
init();
compute(matrix);
}
@@ -638,13 +599,8 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
*/
void compute (const MatrixType& matrix)
{
// Pastix supports only lower, column-major matrices
Base::PastixInit(); // This is necessary to let PaStiX initialize its data structure between successive calls to compute
SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
PermutationMatrix<Dynamic,Dynamic,Index> pnull;
temp.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>().twistedBy(pnull);
m_iparm(IPARM_SYM) = API_SYM_YES;
m_iparm(IPARM_FACTORIZATION) = API_FACT_LLT;
ColSpMatrix temp;
grabMatrix(matrix, temp);
Base::compute(temp);
}
@@ -654,13 +610,8 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
*/
void analyzePattern(const MatrixType& matrix)
{
Base::PastixInit();
// Pastix supports only lower, column-major matrices
SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
PermutationMatrix<Dynamic,Dynamic,Index> pnull;
temp.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>().twistedBy(pnull);
m_iparm(IPARM_SYM) = API_SYM_YES;
m_iparm(IPARM_FACTORIZATION) = API_FACT_LLT;
ColSpMatrix temp;
grabMatrix(matrix, temp);
Base::analyzePattern(temp);
}
/** Compute the LL^T supernodal numerical factorization of \p matrix
@@ -668,19 +619,25 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
*/
void factorize(const MatrixType& matrix)
{
// Pastix supports only lower, column-major matrices
SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
PermutationMatrix<Dynamic,Dynamic,Index> pnull;
temp.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>().twistedBy(pnull);
m_iparm(IPARM_SYM) = API_SYM_YES;
m_iparm(IPARM_FACTORIZATION) = API_FACT_LLT;
ColSpMatrix temp;
grabMatrix(matrix, temp);
Base::factorize(temp);
}
protected:
using Base::m_iparm;
private:
PastixLLT(PastixLLT& ) {}
void init()
{
m_iparm(IPARM_SYM) = API_SYM_YES;
m_iparm(IPARM_FACTORIZATION) = API_FACT_LLT;
}
void grabMatrix(const MatrixType& matrix, ColSpMatrix& out)
{
// Pastix supports only lower, column-major matrices
out.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>();
internal::c_to_fortran_numbering(out);
}
};
/** \ingroup PaStiXSupport_Module
@@ -700,18 +657,21 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
template<typename _MatrixType, int _UpLo>
class PastixLDLT : public PastixBase< PastixLDLT<_MatrixType, _UpLo> >
{
public:
public:
typedef _MatrixType MatrixType;
typedef PastixBase<PastixLDLT<MatrixType, _UpLo> > Base;
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::Index Index;
typedef typename Base::ColSpMatrix ColSpMatrix;
public:
enum { UpLo = _UpLo };
PastixLDLT():Base() {}
PastixLDLT():Base()
{
init();
}
PastixLDLT(const MatrixType& matrix):Base()
{
init();
compute(matrix);
}
@@ -720,13 +680,8 @@ public:
*/
void compute (const MatrixType& matrix)
{
Base::PastixInit();
// Pastix supports only lower, column-major matrices
SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
PermutationMatrix<Dynamic,Dynamic,Index> pnull;
temp.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>().twistedBy(pnull);
m_iparm(IPARM_SYM) = API_SYM_YES;
m_iparm(IPARM_FACTORIZATION) = API_FACT_LDLT;
ColSpMatrix temp;
grabMatrix(matrix, temp);
Base::compute(temp);
}
@@ -736,14 +691,8 @@ public:
*/
void analyzePattern(const MatrixType& matrix)
{
Base::PastixInit();
// Pastix supports only lower, column-major matrices
SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
PermutationMatrix<Dynamic,Dynamic,Index> pnull;
temp.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>().twistedBy(pnull);
m_iparm(IPARM_SYM) = API_SYM_YES;
m_iparm(IPARM_FACTORIZATION) = API_FACT_LDLT;
ColSpMatrix temp;
grabMatrix(matrix, temp);
Base::analyzePattern(temp);
}
/** Compute the LDL^T supernodal numerical factorization of \p matrix
@@ -751,21 +700,26 @@ public:
*/
void factorize(const MatrixType& matrix)
{
// Pastix supports only lower, column-major matrices
SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
PermutationMatrix<Dynamic,Dynamic,Index> pnull;
temp.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>().twistedBy(pnull);
m_iparm(IPARM_SYM) = API_SYM_YES;
m_iparm(IPARM_FACTORIZATION) = API_FACT_LDLT;
ColSpMatrix temp;
grabMatrix(matrix, temp);
Base::factorize(temp);
}
protected:
using Base::m_iparm;
private:
PastixLDLT(PastixLDLT& ) {}
void init()
{
m_iparm(IPARM_SYM) = API_SYM_YES;
m_iparm(IPARM_FACTORIZATION) = API_FACT_LDLT;
}
void grabMatrix(const MatrixType& matrix, ColSpMatrix& out)
{
// Pastix supports only lower, column-major matrices
out.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>();
internal::c_to_fortran_numbering(out);
}
};
namespace internal {

View File

@@ -76,6 +76,9 @@ enum SimplicialCholeskyMode {
* These classes provide LL^T and LDL^T Cholesky factorizations of sparse matrices that are
* selfadjoint and positive definite. The factorization allows for solving A.X = B where
* X and B can be either dense or sparse.
*
* In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization
* such that the factorized matrix is P A P^-1.
*
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
@@ -208,7 +211,7 @@ class SimplicialCholeskyBase : internal::noncopyable
return;
if(m_P.size()>0)
dest = m_Pinv * b;
dest = m_P * b;
else
dest = b;
@@ -222,7 +225,7 @@ class SimplicialCholeskyBase : internal::noncopyable
derived().matrixU().solveInPlace(dest);
if(m_P.size()>0)
dest = m_P * dest;
dest = m_Pinv * dest;
}
/** \internal */
@@ -268,7 +271,7 @@ class SimplicialCholeskyBase : internal::noncopyable
eigen_assert(a.rows()==a.cols());
int size = a.cols();
CholMatrixType ap(size,size);
ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_Pinv);
ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P);
factorize_preordered<DoLDLT>(ap);
}
@@ -358,6 +361,9 @@ template<typename _MatrixType, int _UpLo> struct traits<SimplicialCholesky<_Matr
* This class provides a LL^T Cholesky factorizations of sparse matrices that are
* selfadjoint and positive definite. The factorization allows for solving A.X = B where
* X and B can be either dense or sparse.
*
* In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization
* such that the factorized matrix is P A P^-1.
*
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
@@ -443,6 +449,9 @@ public:
* This class provides a LDL^T Cholesky factorizations without square root of sparse matrices that are
* selfadjoint and positive definite. The factorization allows for solving A.X = B where
* X and B can be either dense or sparse.
*
* In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization
* such that the factorized matrix is P A P^-1.
*
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
@@ -628,7 +637,7 @@ public:
return;
if(Base::m_P.size()>0)
dest = Base::m_Pinv * b;
dest = Base::m_P * b;
else
dest = b;
@@ -652,7 +661,7 @@ public:
}
if(Base::m_P.size()>0)
dest = Base::m_P * dest;
dest = Base::m_Pinv * dest;
}
Scalar determinant() const
@@ -678,22 +687,23 @@ void SimplicialCholeskyBase<Derived>::ordering(const MatrixType& a, CholMatrixTy
eigen_assert(a.rows()==a.cols());
const Index size = a.rows();
// TODO allows to configure the permutation
// Note that amd compute the inverse permutation
{
CholMatrixType C;
C = a.template selfadjointView<UpLo>();
// remove diagonal entries:
// seems not to be needed
// C.prune(keep_diag());
internal::minimum_degree_ordering(C, m_P);
internal::minimum_degree_ordering(C, m_Pinv);
}
if(m_P.size()>0)
m_Pinv = m_P.inverse();
if(m_Pinv.size()>0)
m_P = m_Pinv.inverse();
else
m_Pinv.resize(0);
m_P.resize(0);
ap.resize(size,size);
ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_Pinv);
ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P);
}
template<typename Derived>

View File

@@ -372,7 +372,7 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
const typename SparseDenseProductReturnType<Derived,OtherDerived>::Type
operator*(const MatrixBase<OtherDerived> &other) const;
/** \returns an expression of P^-1 H P */
/** \returns an expression of P H P^-1 where H is the matrix represented by \c *this */
SparseSymmetricPermutationProduct<Derived,Upper|Lower> twistedBy(const PermutationMatrix<Dynamic,Dynamic,Index>& perm) const
{
return SparseSymmetricPermutationProduct<Derived,Upper|Lower>(derived(), perm);

View File

@@ -125,7 +125,7 @@ template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView
_dest = tmp;
}
/** \returns an expression of P^-1 H P */
/** \returns an expression of P H P^-1 */
SparseSymmetricPermutationProduct<_MatrixTypeNested,UpLo> twistedBy(const PermutationMatrix<Dynamic,Dynamic,Index>& perm) const
{
return SparseSymmetricPermutationProduct<_MatrixTypeNested,UpLo>(m_matrix, perm);

View File

@@ -36,7 +36,7 @@
# define EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
# endif
# include <windows.h>
#elif __APPLE__
#elif defined(__APPLE__)
#include <CoreServices/CoreServices.h>
#include <mach/mach_time.h>
#else

View File

@@ -5,6 +5,7 @@ axpby ; "{/*1.5 Y = alpha X + beta Y}" ; "vector size" ; 5:1000000
axpy ; "{/*1.5 Y += alpha X}" ; "vector size" ; 5:1000000
matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:3000
matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:3000
trmm ; "{/*1.5 triangular matrix matrix product}" ; "matrix size" ; 4:3000
trisolve_vector ; "{/*1.5 triangular solver - vector (X = inv(L) X)}" ; "size" ; 4:3000
trisolve_matrix ; "{/*1.5 triangular solver - matrix (M = inv(L) M)}" ; "size" ; 4:3000
cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:3000

View File

@@ -38,6 +38,7 @@ source mk_mean_script.sh atv $1 11 50 300 1000 $mode $prefix
source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh aat $1 11 100 300 1000 $mode $prefix
# source mk_mean_script.sh ata $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh trmm $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh trisolve_vector $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh trisolve_matrix $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh cholesky $1 11 100 300 1000 $mode $prefix

View File

@@ -195,16 +195,16 @@ public :
}
static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix& X, int N){
X = L.template triangularView<Lower>().solve(B);
X = L.template triangularView<Upper>().solve(B);
}
static inline void trmm(const gene_matrix & L, const gene_matrix& B, gene_matrix& X, int N){
X = L.template triangularView<Lower>() * B;
X.noalias() = L.template triangularView<Lower>() * B;
}
static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
C = X;
internal::llt_inplace<Lower>::blocked(C);
internal::llt_inplace<real,Lower>::blocked(C);
//C = X.llt().matrixL();
// C = X;
// Cholesky<gene_matrix>::computeInPlace(C);

View File

@@ -81,7 +81,7 @@ int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScal
int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb)
{
// std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " " << *palpha << " " << *lda << " " << *ldb<< "\n";
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex);
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, internal::level3_blocking<Scalar,Scalar>&);
static functype func[32];
static bool init = false;
@@ -143,11 +143,17 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m,
return xerbla_(SCALAR_SUFFIX_UP"TRSM ",&info,6);
int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4);
if(SIDE(*side)==LEFT)
func[code](*m, *n, a, *lda, b, *ldb);
{
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*m);
func[code](*m, *n, a, *lda, b, *ldb, blocking);
}
else
func[code](*n, *m, a, *lda, b, *ldb);
{
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*n);
func[code](*n, *m, a, *lda, b, *ldb, blocking);
}
if(alpha!=Scalar(1))
matrix(b,*m,*n,*ldb) *= alpha;

View File

@@ -1,6 +1,6 @@
namespace Eigen {
/** \page TopicVectorization Vectorizaion
/** \page TopicVectorization Vectorization
TODO: write this dox page!

View File

@@ -0,0 +1,46 @@
namespace Eigen {
/** \page TopicMultiThreading Eigen and multi-threading
\section TopicMultiThreading_MakingEigenMT Make Eigen run in parallel
Some Eigen's algorithms can exploit the multiple cores present in your hardware. To this end, it is enough to enable OpenMP on your compiler, for instance:
* GCC: \c -fopenmp
* ICC: \c -openmp
* MSVC: check the respective option in the build properties.
You can control the number of thread that will be used using either the OpenMP API or Eiegn's API using the following priority:
\code
OMP_NUM_THREADS=n ./my_program
omp_set_num_threads(n);
Eigen::setNbThreads(n);
\endcode
Unless setNbThreads has been called, Eigen uses the number of threads specified by OpenMP. You can restore this bahavior by calling \code setNbThreads(0); \endcode
You can query the number of threads that will be used with:
\code
n = Eigen::nbThreads(n);
\endcode
You can disable Eigen's multi threading at compile time by defining the EIGEN_DONT_PARALLELIZE preprocessor token.
Currently, the following algorithms can make use of multi-threading:
* general matrix - matrix products
* PartialPivLU
\section TopicMultiThreading_UsingEigenWithMT Using Eigen in a multi-threaded application
In the case your own application is multithreaded, and multiple threads make calls to Eigen, then you have to initialize Eigen by calling the following routine \b before creating the threads:
\code
#include <Eigen/Core>
int main(int argc, char** argv)
{
Eigen::initParallel();
...
}
\endcode
In the case your application is parallelized with OpenMP, you might want to disable Eigen's own parallization as detailed in the previous section.
*/
}

View File

@@ -18,13 +18,15 @@ set(LAPACK_FOUND TRUE)
set(BLAS_LIBRARIES eigen_blas)
set(LAPACK_LIBRARIES eigen_lapack)
if(TEST_REAL_CASES)
set(EIGEN_TEST_MATRIX_DIR "" CACHE STRING "Enable testing of realword sparse matrices contained in the specified path")
if(EIGEN_TEST_MATRIX_DIR)
if(NOT WIN32)
add_definitions( -DTEST_REAL_CASES="${TEST_REAL_CASES}" )
message(STATUS "Test realworld sparse matrices: ${EIGEN_TEST_MATRIX_DIR}")
add_definitions( -DTEST_REAL_CASES="${EIGEN_TEST_MATRIX_DIR}" )
else(NOT WIN32)
message(STATUS, "REAL CASES CAN NOT BE CURRENTLY TESTED ON WIN32")
message(STATUS "REAL CASES CAN NOT BE CURRENTLY TESTED ON WIN32")
endif(NOT WIN32)
endif(TEST_REAL_CASES)
endif(EIGEN_TEST_MATRIX_DIR)
set(SPARSE_LIBS " ")

View File

@@ -129,13 +129,20 @@ void ctms_decompositions()
0,
maxSize, maxSize> ComplexMatrix;
const Matrix A(Matrix::Random(size, size));
const Matrix A(Matrix::Random(size, size)), B(Matrix::Random(size, size));
Matrix X(size,size);
const ComplexMatrix complexA(ComplexMatrix::Random(size, size));
const Matrix saA = A.adjoint() * A;
const Vector b(Vector::Random(size));
Vector x(size);
// Cholesky module
Eigen::LLT<Matrix> LLT; LLT.compute(A);
X = LLT.solve(B);
x = LLT.solve(b);
Eigen::LDLT<Matrix> LDLT; LDLT.compute(A);
X = LDLT.solve(B);
x = LDLT.solve(b);
// Eigenvalues module
Eigen::HessenbergDecomposition<ComplexMatrix> hessDecomp; hessDecomp.compute(complexA);
@@ -147,12 +154,22 @@ void ctms_decompositions()
// LU module
Eigen::PartialPivLU<Matrix> ppLU; ppLU.compute(A);
X = ppLU.solve(B);
x = ppLU.solve(b);
Eigen::FullPivLU<Matrix> fpLU; fpLU.compute(A);
X = fpLU.solve(B);
x = fpLU.solve(b);
// QR module
Eigen::HouseholderQR<Matrix> hQR; hQR.compute(A);
X = hQR.solve(B);
x = hQR.solve(b);
Eigen::ColPivHouseholderQR<Matrix> cpQR; cpQR.compute(A);
X = cpQR.solve(B);
x = cpQR.solve(b);
Eigen::FullPivHouseholderQR<Matrix> fpQR; fpQR.compute(A);
X = fpQR.solve(B);
x = fpQR.solve(b);
// SVD module
Eigen::JacobiSVD<Matrix> jSVD; jSVD.compute(A, ComputeFullU | ComputeFullV);