mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Compare commits
37 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dd3685cc6a | ||
|
|
487a6e6515 | ||
|
|
75f0b8aae3 | ||
|
|
0164f4c682 | ||
|
|
bbff608a42 | ||
|
|
ea56d2ff2c | ||
|
|
a4c8701e9a | ||
|
|
0a08d4c60b | ||
|
|
4086187e49 | ||
|
|
c3597106ab | ||
|
|
aed1d6597f | ||
|
|
b6f04a2dd4 | ||
|
|
a9aa3bcf50 | ||
|
|
32b8da66e3 | ||
|
|
eb94179ea3 | ||
|
|
52a7386aef | ||
|
|
8cada1d894 | ||
|
|
6e4a664c42 | ||
|
|
1cd1a96d56 | ||
|
|
86ab00cdcf | ||
|
|
65f09be8d2 | ||
|
|
400d756b82 | ||
|
|
9d31798a84 | ||
|
|
0a7de0b273 | ||
|
|
a287140f72 | ||
|
|
4d89ec8a00 | ||
|
|
441760f239 | ||
|
|
664162fb8a | ||
|
|
aa3c761002 | ||
|
|
94f2cfc9c7 | ||
|
|
4a13d79df6 | ||
|
|
463176cc44 | ||
|
|
5aab97fba6 | ||
|
|
89abc6806d | ||
|
|
baf793ebaa | ||
|
|
b4ddafcfac | ||
|
|
1079967710 |
@@ -380,7 +380,7 @@ else()
|
||||
)
|
||||
endif()
|
||||
set(CMAKEPACKAGE_INSTALL_DIR
|
||||
"${CMAKE_INSTALL_LIBDIR}/cmake/eigen3"
|
||||
"${CMAKE_INSTALL_DATADIR}/eigen3/cmake"
|
||||
CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen3Config.cmake is installed"
|
||||
)
|
||||
set(PKGCONFIG_INSTALL_DIR
|
||||
@@ -507,18 +507,89 @@ set ( EIGEN_VERSION_MINOR ${EIGEN_MAJOR_VERSION} )
|
||||
set ( EIGEN_VERSION_PATCH ${EIGEN_MINOR_VERSION} )
|
||||
set ( EIGEN_DEFINITIONS "")
|
||||
set ( EIGEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${INCLUDE_INSTALL_DIR}" )
|
||||
set ( EIGEN_INCLUDE_DIRS ${EIGEN_INCLUDE_DIR} )
|
||||
set ( EIGEN_ROOT_DIR ${CMAKE_INSTALL_PREFIX} )
|
||||
|
||||
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
@ONLY ESCAPE_QUOTES
|
||||
)
|
||||
# Interface libraries require at least CMake 3.0
|
||||
if (NOT CMAKE_VERSION VERSION_LESS 3.0)
|
||||
include (CMakePackageConfigHelpers)
|
||||
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
|
||||
)
|
||||
# Imported target support
|
||||
add_library (eigen INTERFACE)
|
||||
|
||||
target_compile_definitions (eigen INTERFACE ${EIGEN_DEFINITIONS})
|
||||
target_include_directories (eigen INTERFACE
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
|
||||
$<INSTALL_INTERFACE:${INCLUDE_INSTALL_DIR}>
|
||||
)
|
||||
|
||||
# Export as title case Eigen
|
||||
set_target_properties (eigen PROPERTIES EXPORT_NAME Eigen)
|
||||
|
||||
install (TARGETS eigen EXPORT Eigen3Targets)
|
||||
|
||||
configure_package_config_file (
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
PATH_VARS EIGEN_INCLUDE_DIR EIGEN_ROOT_DIR
|
||||
INSTALL_DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
|
||||
NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components
|
||||
)
|
||||
# Remove CMAKE_SIZEOF_VOID_P from Eigen3ConfigVersion.cmake since Eigen does
|
||||
# not depend on architecture specific settings or libraries. More
|
||||
# specifically, an Eigen3Config.cmake generated from a 64 bit target can be
|
||||
# used for 32 bit targets as well (and vice versa).
|
||||
set (_Eigen3_CMAKE_SIZEOF_VOID_P ${CMAKE_SIZEOF_VOID_P})
|
||||
unset (CMAKE_SIZEOF_VOID_P)
|
||||
write_basic_package_version_file (Eigen3ConfigVersion.cmake
|
||||
VERSION ${EIGEN_VERSION_NUMBER} COMPATIBILITY SameMajorVersion)
|
||||
set (CMAKE_SIZEOF_VOID_P ${_Eigen3_CMAKE_SIZEOF_VOID_P})
|
||||
|
||||
# The Eigen target will be located in the Eigen3 namespace. Other CMake
|
||||
# targets can refer to it using Eigen3::Eigen.
|
||||
export (TARGETS eigen NAMESPACE Eigen3:: FILE Eigen3Targets.cmake)
|
||||
# Export Eigen3 package to CMake registry such that it can be easily found by
|
||||
# CMake even if it has not been installed to a standard directory.
|
||||
export (PACKAGE Eigen3)
|
||||
|
||||
install (EXPORT Eigen3Targets NAMESPACE Eigen3:: DESTINATION
|
||||
${CMAKEPACKAGE_INSTALL_DIR})
|
||||
install (FILES
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3ConfigVersion.cmake
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake
|
||||
DESTINATION ${CMAKEPACKAGE_INSTALL_DIR})
|
||||
else (NOT CMAKE_VERSION VERSION_LESS 3.0)
|
||||
# Fallback to legacy Eigen3Config.cmake without the imported target
|
||||
|
||||
# If CMakePackageConfigHelpers module is available (CMake >= 2.8.8)
|
||||
# create a relocatable Config file, otherwise leave the hardcoded paths
|
||||
include(CMakePackageConfigHelpers OPTIONAL RESULT_VARIABLE CPCH_PATH)
|
||||
|
||||
if(CPCH_PATH)
|
||||
configure_package_config_file (
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3ConfigLegacy.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
PATH_VARS EIGEN_INCLUDE_DIR EIGEN_ROOT_DIR
|
||||
INSTALL_DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
|
||||
NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components
|
||||
)
|
||||
else()
|
||||
# The PACKAGE_* variables are defined by the configure_package_config_file
|
||||
# but without it we define them manually to the hardcoded paths
|
||||
set(PACKAGE_INIT "")
|
||||
set(PACKAGE_EIGEN_INCLUDE_DIR ${EIGEN_INCLUDE_DIR})
|
||||
set(PACKAGE_EIGEN_ROOT_DIR ${EIGEN_ROOT_DIR})
|
||||
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3ConfigLegacy.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
@ONLY ESCAPE_QUOTES
|
||||
)
|
||||
endif()
|
||||
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
|
||||
)
|
||||
endif (NOT CMAKE_VERSION VERSION_LESS 3.0)
|
||||
|
||||
# Add uninstall target
|
||||
add_custom_target ( uninstall
|
||||
|
||||
@@ -4,14 +4,10 @@
|
||||
## # The following are required to uses Dart and the Cdash dashboard
|
||||
## ENABLE_TESTING()
|
||||
## INCLUDE(CTest)
|
||||
set(CTEST_PROJECT_NAME "Eigen")
|
||||
set(CTEST_PROJECT_NAME "Eigen3.3")
|
||||
set(CTEST_NIGHTLY_START_TIME "00:00:00 UTC")
|
||||
|
||||
set(CTEST_DROP_METHOD "http")
|
||||
set(CTEST_DROP_SITE "manao.inria.fr")
|
||||
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen")
|
||||
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen3.3")
|
||||
set(CTEST_DROP_SITE_CDASH TRUE)
|
||||
set(CTEST_PROJECT_SUBPROJECTS
|
||||
Official
|
||||
Unsupported
|
||||
)
|
||||
|
||||
@@ -407,7 +407,7 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
|
||||
: int(Kernel::AssignmentTraits::DstAlignment),
|
||||
srcAlignment = Kernel::AssignmentTraits::JointAlignment
|
||||
};
|
||||
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(&kernel.dstEvaluator().coeffRef(0), size);
|
||||
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
|
||||
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
|
||||
|
||||
unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
|
||||
@@ -527,7 +527,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
||||
dstAlignment = alignable ? int(requestedAlignment)
|
||||
: int(Kernel::AssignmentTraits::DstAlignment)
|
||||
};
|
||||
const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0);
|
||||
const Scalar *dst_ptr = kernel.dstDataPtr();
|
||||
if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
|
||||
{
|
||||
// the pointer is not aligend-on scalar, so alignment is not possible
|
||||
@@ -683,6 +683,11 @@ public:
|
||||
: int(DstEvaluatorType::Flags)&RowMajorBit ? inner
|
||||
: outer;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
|
||||
{
|
||||
return m_dstExpr.data();
|
||||
}
|
||||
|
||||
protected:
|
||||
DstEvaluatorType& m_dst;
|
||||
@@ -876,6 +881,34 @@ struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
src.evalTo(dst);
|
||||
}
|
||||
|
||||
// NOTE The following two functions are templated to avoid their instanciation if not needed
|
||||
// This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
|
||||
template<typename SrcScalarType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
src.addTo(dst);
|
||||
}
|
||||
|
||||
template<typename SrcScalarType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
src.subTo(dst);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
@@ -84,6 +84,7 @@ class CwiseBinaryOp :
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename internal::remove_all<BinaryOp>::type Functor;
|
||||
typedef typename internal::remove_all<LhsType>::type Lhs;
|
||||
typedef typename internal::remove_all<RhsType>::type Rhs;
|
||||
|
||||
|
||||
@@ -70,9 +70,11 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
|
||||
#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG))
|
||||
typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func;
|
||||
EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar);
|
||||
|
||||
#endif
|
||||
|
||||
eigen_assert(size() == other.size());
|
||||
|
||||
return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other);
|
||||
|
||||
@@ -25,7 +25,8 @@ template<int Rows, int Cols, int Depth> struct product_type_selector;
|
||||
template<int Size, int MaxSize> struct product_size_category
|
||||
{
|
||||
enum { is_large = MaxSize == Dynamic ||
|
||||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
|
||||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
|
||||
(Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
|
||||
value = is_large ? Large
|
||||
: Size == 1 ? 1
|
||||
: Small
|
||||
@@ -264,7 +265,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
|
||||
if (!evalToDest)
|
||||
{
|
||||
if(!alphaIsCompatible)
|
||||
dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
|
||||
dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
|
||||
else
|
||||
dest = MappedDest(actualDestPtr, dest.size());
|
||||
}
|
||||
@@ -329,6 +330,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,false>
|
||||
template<typename Lhs, typename Rhs, typename Dest>
|
||||
static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
// TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp
|
||||
typename nested_eval<Rhs,1>::type actual_rhs(rhs);
|
||||
const Index size = rhs.rows();
|
||||
@@ -342,6 +344,7 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
|
||||
template<typename Lhs, typename Rhs, typename Dest>
|
||||
static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
typename nested_eval<Rhs,Lhs::RowsAtCompileTime>::type actual_rhs(rhs);
|
||||
const Index rows = dest.rows();
|
||||
for(Index i=0; i<rows; ++i)
|
||||
|
||||
@@ -763,6 +763,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
{
|
||||
// NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.
|
||||
const bool is_integer = NumTraits<T>::IsInteger;
|
||||
EIGEN_UNUSED_VARIABLE(is_integer);
|
||||
EIGEN_STATIC_ASSERT(is_integer,
|
||||
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
|
||||
resize(size);
|
||||
|
||||
@@ -366,17 +366,22 @@ template<typename Lhs, typename Rhs>
|
||||
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
|
||||
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> >
|
||||
{
|
||||
typedef typename nested_eval<Lhs,1>::type LhsNested;
|
||||
typedef typename nested_eval<Rhs,1>::type RhsNested;
|
||||
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
||||
enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
|
||||
typedef typename internal::conditional<int(Side)==OnTheRight,Lhs,Rhs>::type MatrixType;
|
||||
typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
|
||||
|
||||
template<typename Dest>
|
||||
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||
{
|
||||
LhsNested actual_lhs(lhs);
|
||||
RhsNested actual_rhs(rhs);
|
||||
|
||||
internal::gemv_dense_selector<Side,
|
||||
(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
|
||||
bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)
|
||||
>::run(lhs, rhs, dst, alpha);
|
||||
>::run(actual_lhs, actual_rhs, dst, alpha);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
@@ -24,13 +25,48 @@ struct Packet1cd
|
||||
Packet2d v;
|
||||
};
|
||||
|
||||
struct Packet2cf
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
|
||||
union {
|
||||
Packet4f v;
|
||||
Packet1cd cd[2];
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
{
|
||||
typedef Packet2cf type;
|
||||
typedef Packet2cf half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 2,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasBlend = 1,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
{
|
||||
typedef Packet1cd type;
|
||||
typedef Packet1cd half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 0,
|
||||
AlignedOnScalar = 1,
|
||||
size = 1,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
@@ -47,20 +83,68 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
|
||||
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
|
||||
|
||||
/* Forward declaration */
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel);
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
|
||||
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
Packet2cf res;
|
||||
res.cd[0] = Packet1cd(vec_ld2f((const float *)&from));
|
||||
res.cd[1] = res.cd[0];
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 af[2];
|
||||
af[0] = from[0*stride];
|
||||
af[1] = from[1*stride];
|
||||
return pload<Packet2cf>(af);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride EIGEN_UNUSED)
|
||||
{
|
||||
return pload<Packet1cd>(from);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 af[2];
|
||||
pstore<std::complex<float> >((std::complex<float> *) af, from);
|
||||
to[0*stride] = af[0];
|
||||
to[1*stride] = af[1];
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride EIGEN_UNUSED)
|
||||
{
|
||||
pstore<std::complex<double> >(to, from);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v, b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v, b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(Packet4f(a.v))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
|
||||
{
|
||||
Packet2cf res;
|
||||
res.v.v4f[0] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0]))).v;
|
||||
res.v.v4f[1] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1]))).v;
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
@@ -79,43 +163,90 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con
|
||||
|
||||
return Packet1cd(v1 + v2);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from)
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
return pset1<Packet1cd>(*from);
|
||||
Packet2cf res;
|
||||
res.v.v4f[0] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[0]))).v;
|
||||
res.v.v4f[1] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[1]))).v;
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v,b.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
std::complex<double> EIGEN_ALIGN16 res[2];
|
||||
pstore<std::complex<double> >(res, a);
|
||||
std::complex<double> EIGEN_ALIGN16 res;
|
||||
pstore<std::complex<double> >(&res, a);
|
||||
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 res[2];
|
||||
pstore<std::complex<float> >(res, a);
|
||||
|
||||
return res[0];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
|
||||
{
|
||||
Packet2cf res;
|
||||
res.cd[0] = a.cd[1];
|
||||
res.cd[1] = a.cd[0];
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
return pfirst(a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
std::complex<float> res;
|
||||
Packet1cd b = padd<Packet1cd>(a.cd[0], a.cd[1]);
|
||||
vec_st2f(b.v, (float*)&res);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
|
||||
{
|
||||
return vecs[0];
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
|
||||
{
|
||||
PacketBlock<Packet2cf,2> transpose;
|
||||
transpose.packet[0] = vecs[0];
|
||||
transpose.packet[1] = vecs[1];
|
||||
ptranspose(transpose);
|
||||
|
||||
return padd<Packet2cf>(transpose.packet[0], transpose.packet[1]);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
return pfirst(a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
std::complex<float> res;
|
||||
Packet1cd b = pmul<Packet1cd>(a.cd[0], a.cd[1]);
|
||||
vec_st2f(b.v, (float*)&res);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet1cd>
|
||||
@@ -127,6 +258,18 @@ struct palign_impl<Offset,Packet1cd>
|
||||
}
|
||||
};
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet2cf>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
|
||||
{
|
||||
if (Offset == 1) {
|
||||
first.cd[0] = first.cd[1];
|
||||
first.cd[1] = second.cd[0];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
@@ -160,6 +303,39 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return internal::pmul(a, pconj(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return internal::pmul(pconj(a), b);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return pconj(internal::pmul(a, b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
// TODO optimize it for AltiVec
|
||||
@@ -168,17 +344,49 @@ template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, con
|
||||
return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
// TODO optimize it for AltiVec
|
||||
Packet2cf res;
|
||||
res.cd[0] = pdiv<Packet1cd>(a.cd[0], b.cd[0]);
|
||||
res.cd[1] = pdiv<Packet1cd>(a.cd[1], b.cd[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
||||
{
|
||||
return Packet1cd(preverse(Packet2d(x.v)));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
|
||||
{
|
||||
Packet2cf res;
|
||||
res.cd[0] = pcplxflip(x.cd[0]);
|
||||
res.cd[1] = pcplxflip(x.cd[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
|
||||
{
|
||||
Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
|
||||
kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
|
||||
kernel.packet[0].v = tmp;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
|
||||
{
|
||||
Packet1cd tmp = kernel.packet[0].cd[1];
|
||||
kernel.packet[0].cd[1] = kernel.packet[1].cd[0];
|
||||
kernel.packet[1].cd[0] = tmp;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
|
||||
Packet2cf result;
|
||||
const Selector<4> ifPacket4 = { ifPacket.select[0], ifPacket.select[0], ifPacket.select[1], ifPacket.select[1] };
|
||||
result.v = pblend<Packet4f>(ifPacket4, thenPacket.v, elsePacket.v);
|
||||
return result;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
//
|
||||
// Copyright (C) 2007 Julien Pommier
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
@@ -19,32 +20,32 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
{
|
||||
Packet2d x = _x;
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
|
||||
|
||||
Packet2d tmp, fx;
|
||||
Packet2l emm0;
|
||||
|
||||
@@ -91,18 +92,44 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
isnumber_mask);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f pexp<Packet4f>(const Packet4f& x)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pexp<Packet2d>(x.v4f[0]);
|
||||
res.v4f[1] = pexp<Packet2d>(x.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d psqrt<Packet2d>(const Packet2d& x)
|
||||
{
|
||||
return __builtin_s390_vfsqdb(x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f psqrt<Packet4f>(const Packet4f& x)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = psqrt<Packet2d>(x.v4f[0]);
|
||||
res.v4f[1] = psqrt<Packet2d>(x.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d prsqrt<Packet2d>(const Packet2d& x) {
|
||||
// Unfortunately we can't use the much faster mm_rqsrt_pd since it only provides an approximation.
|
||||
return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f prsqrt<Packet4f>(const Packet4f& x) {
|
||||
Packet4f res;
|
||||
res.v4f[0] = prsqrt<Packet2d>(x.v4f[0]);
|
||||
res.v4f[1] = prsqrt<Packet2d>(x.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -28,9 +28,8 @@ namespace internal {
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#endif
|
||||
|
||||
// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16
|
||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
|
||||
#endif
|
||||
|
||||
typedef __vector int Packet4i;
|
||||
@@ -42,6 +41,10 @@ typedef __vector double Packet2d;
|
||||
typedef __vector unsigned long long Packet2ul;
|
||||
typedef __vector long long Packet2l;
|
||||
|
||||
typedef struct {
|
||||
Packet2d v4f[2];
|
||||
} Packet4f;
|
||||
|
||||
typedef union {
|
||||
int32_t i[4];
|
||||
uint32_t ui[4];
|
||||
@@ -88,6 +91,7 @@ static Packet2d p2d_ONE = { 1.0, 1.0 };
|
||||
static Packet2d p2d_ZERO_ = { -0.0, -0.0 };
|
||||
|
||||
static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
|
||||
static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
|
||||
static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet16uc>(p2d_ZERO), reinterpret_cast<Packet16uc>(p2d_ONE), 8));
|
||||
|
||||
static Packet16uc p16uc_PSET64_HI = { 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
|
||||
@@ -132,13 +136,11 @@ template<> struct packet_traits<int> : default_packet_traits
|
||||
typedef Packet4i type;
|
||||
typedef Packet4i half;
|
||||
enum {
|
||||
// FIXME check the Has*
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 4,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
// FIXME check the Has*
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
@@ -147,6 +149,37 @@ template<> struct packet_traits<int> : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef Packet4f type;
|
||||
typedef Packet4f half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasMin = 1,
|
||||
HasMax = 1,
|
||||
HasAbs = 1,
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 0,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
HasCeil = 1,
|
||||
HasNegate = 1,
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct packet_traits<double> : default_packet_traits
|
||||
{
|
||||
typedef Packet2d type;
|
||||
@@ -157,7 +190,6 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
size=2,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
// FIXME check the Has*
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
@@ -180,8 +212,12 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
|
||||
|
||||
/* Forward declaration */
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f,4>& kernel);
|
||||
|
||||
inline std::ostream & operator <<(std::ostream & s, const Packet4i & v)
|
||||
{
|
||||
Packet vt;
|
||||
@@ -222,6 +258,32 @@ inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
|
||||
return s;
|
||||
}
|
||||
|
||||
/* Helper function to simulate a vec_splat_packet4f
|
||||
*/
|
||||
template<int element> EIGEN_STRONG_INLINE Packet4f vec_splat_packet4f(const Packet4f& from)
|
||||
{
|
||||
Packet4f splat;
|
||||
switch (element) {
|
||||
case 0:
|
||||
splat.v4f[0] = vec_splat(from.v4f[0], 0);
|
||||
splat.v4f[1] = splat.v4f[0];
|
||||
break;
|
||||
case 1:
|
||||
splat.v4f[0] = vec_splat(from.v4f[0], 1);
|
||||
splat.v4f[1] = splat.v4f[0];
|
||||
break;
|
||||
case 2:
|
||||
splat.v4f[0] = vec_splat(from.v4f[1], 0);
|
||||
splat.v4f[1] = splat.v4f[0];
|
||||
break;
|
||||
case 3:
|
||||
splat.v4f[0] = vec_splat(from.v4f[1], 1);
|
||||
splat.v4f[1] = splat.v4f[0];
|
||||
break;
|
||||
}
|
||||
return splat;
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4i>
|
||||
{
|
||||
@@ -238,6 +300,31 @@ struct palign_impl<Offset,Packet4i>
|
||||
}
|
||||
};
|
||||
|
||||
/* This is a tricky one, we have to translate float alignment to vector elements of sizeof double
|
||||
*/
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4f>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
|
||||
{
|
||||
switch (Offset % 4) {
|
||||
case 1:
|
||||
first.v4f[0] = vec_sld(first.v4f[0], first.v4f[1], 8);
|
||||
first.v4f[1] = vec_sld(first.v4f[1], second.v4f[0], 8);
|
||||
break;
|
||||
case 2:
|
||||
first.v4f[0] = first.v4f[1];
|
||||
first.v4f[1] = second.v4f[0];
|
||||
break;
|
||||
case 3:
|
||||
first.v4f[0] = vec_sld(first.v4f[1], second.v4f[0], 8);
|
||||
first.v4f[1] = vec_sld(second.v4f[0], second.v4f[1], 8);
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet2d>
|
||||
{
|
||||
@@ -257,6 +344,16 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
|
||||
return vfrom->v4i;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
|
||||
{
|
||||
// FIXME: No intrinsic yet
|
||||
EIGEN_DEBUG_ALIGNED_LOAD
|
||||
Packet4f vfrom;
|
||||
vfrom.v4f[0] = vec_ld2f(&from[0]);
|
||||
vfrom.v4f[1] = vec_ld2f(&from[2]);
|
||||
return vfrom;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
|
||||
{
|
||||
// FIXME: No intrinsic yet
|
||||
@@ -275,6 +372,15 @@ template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& f
|
||||
vto->v4i = from;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
|
||||
{
|
||||
// FIXME: No intrinsic yet
|
||||
EIGEN_DEBUG_ALIGNED_STORE
|
||||
vec_st2f(from.v4f[0], &to[0]);
|
||||
vec_st2f(from.v4f[1], &to[2]);
|
||||
}
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
|
||||
{
|
||||
// FIXME: No intrinsic yet
|
||||
@@ -288,10 +394,16 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from)
|
||||
{
|
||||
return vec_splats(from);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
|
||||
return vec_splats(from);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from)
|
||||
{
|
||||
Packet4f to;
|
||||
to.v4f[0] = pset1<Packet2d>(static_cast<const double&>(from));
|
||||
to.v4f[1] = to.v4f[0];
|
||||
return to;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet4i>(const int *a,
|
||||
@@ -304,6 +416,17 @@ pbroadcast4<Packet4i>(const int *a,
|
||||
a3 = vec_splat(a3, 3);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet4f>(const float *a,
|
||||
Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
|
||||
{
|
||||
a3 = pload<Packet4f>(a);
|
||||
a0 = vec_splat_packet4f<0>(a3);
|
||||
a1 = vec_splat_packet4f<1>(a3);
|
||||
a2 = vec_splat_packet4f<2>(a3);
|
||||
a3 = vec_splat_packet4f<3>(a3);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet2d>(const double *a,
|
||||
Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
|
||||
@@ -326,6 +449,16 @@ template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* f
|
||||
return pload<Packet4i>(ai);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
|
||||
{
|
||||
float EIGEN_ALIGN16 ai[4];
|
||||
ai[0] = from[0*stride];
|
||||
ai[1] = from[1*stride];
|
||||
ai[2] = from[2*stride];
|
||||
ai[3] = from[3*stride];
|
||||
return pload<Packet4f>(ai);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
|
||||
{
|
||||
double EIGEN_ALIGN16 af[2];
|
||||
@@ -344,6 +477,16 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const
|
||||
to[3*stride] = ai[3];
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
|
||||
{
|
||||
float EIGEN_ALIGN16 ai[4];
|
||||
pstore<float>((float *)ai, from);
|
||||
to[0*stride] = ai[0];
|
||||
to[1*stride] = ai[1];
|
||||
to[2*stride] = ai[2];
|
||||
to[3*stride] = ai[3];
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
|
||||
{
|
||||
double EIGEN_ALIGN16 af[2];
|
||||
@@ -353,52 +496,160 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to,
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a + b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f c;
|
||||
c.v4f[0] = a.v4f[0] + b.v4f[0];
|
||||
c.v4f[1] = a.v4f[1] + b.v4f[1];
|
||||
return c;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a + b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a - b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f c;
|
||||
c.v4f[0] = a.v4f[0] - b.v4f[0];
|
||||
c.v4f[1] = a.v4f[1] - b.v4f[1];
|
||||
return c;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a - b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a * b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f c;
|
||||
c.v4f[0] = a.v4f[0] * b.v4f[0];
|
||||
c.v4f[1] = a.v4f[1] * b.v4f[1];
|
||||
return c;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a * b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a / b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f c;
|
||||
c.v4f[0] = a.v4f[0] / b.v4f[0];
|
||||
c.v4f[1] = a.v4f[1] / b.v4f[1];
|
||||
return c;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a / b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return (-a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
|
||||
{
|
||||
Packet4f c;
|
||||
c.v4f[0] = -a.v4f[0];
|
||||
c.v4f[1] = -a.v4f[1];
|
||||
return c;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return (-a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd<Packet4i>(pmul<Packet4i>(a, b), c); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = vec_madd(a.v4f[0], b.v4f[0], c.v4f[0]);
|
||||
res.v4f[1] = vec_madd(a.v4f[1], b.v4f[1], c.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return padd<Packet4i>(pset1<Packet4i>(a), p4i_COUNTDOWN); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return padd<Packet4f>(pset1<Packet4f>(a), p4f_COUNTDOWN); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return padd<Packet2d>(pset1<Packet2d>(a), p2d_COUNTDOWN); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pmin(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pmin(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pmax(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pmax(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return pand<Packet4i>(a, vec_nor(b, b)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pandnot(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pandnot(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = vec_round(a.v4f[0]);
|
||||
res.v4f[1] = vec_round(a.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = vec_ceil(a.v4f[0]);
|
||||
res.v4f[1] = vec_ceil(a.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return vec_ceil(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = vec_floor(a.v4f[0]);
|
||||
res.v4f[1] = vec_floor(a.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { return pload<Packet4i>(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { return pload<Packet4f>(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { return pload<Packet2d>(from); }
|
||||
|
||||
|
||||
@@ -408,6 +659,14 @@ template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
|
||||
return vec_perm(p, p, p16uc_DUPLICATE32_HI);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
|
||||
{
|
||||
Packet4f p = pload<Packet4f>(from);
|
||||
p.v4f[1] = vec_splat(p.v4f[0], 1);
|
||||
p.v4f[0] = vec_splat(p.v4f[0], 0);
|
||||
return p;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
|
||||
{
|
||||
Packet2d p = pload<Packet2d>(from);
|
||||
@@ -415,12 +674,15 @@ template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { pstore<int>(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { pstore<float>(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { pstore<double>(to, from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; }
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[2]; vec_st2f(a.v4f[0], &x[0]); return x[0]; }
|
||||
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
|
||||
@@ -433,8 +695,23 @@ template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
|
||||
return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
|
||||
{
|
||||
Packet4f rev;
|
||||
rev.v4f[0] = preverse<Packet2d>(a.v4f[1]);
|
||||
rev.v4f[1] = preverse<Packet2d>(a.v4f[0]);
|
||||
return rev;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pabs<Packet4i>(const Packet4i& a) { return vec_abs(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pabs<Packet2d>(const Packet2d& a) { return vec_abs(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pabs<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pabs(a.v4f[0]);
|
||||
res.v4f[1] = pabs(a.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
@@ -453,6 +730,13 @@ template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
|
||||
sum = padd<Packet2d>(a, b);
|
||||
return pfirst(sum);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet2d sum;
|
||||
sum = padd<Packet2d>(a.v4f[0], a.v4f[1]);
|
||||
double first = predux<Packet2d>(sum);
|
||||
return static_cast<float>(first);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
|
||||
{
|
||||
@@ -493,6 +777,21 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
|
||||
return sum;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
|
||||
{
|
||||
PacketBlock<Packet4f,4> transpose;
|
||||
transpose.packet[0] = vecs[0];
|
||||
transpose.packet[1] = vecs[1];
|
||||
transpose.packet[2] = vecs[2];
|
||||
transpose.packet[3] = vecs[3];
|
||||
ptranspose(transpose);
|
||||
|
||||
Packet4f sum = padd(transpose.packet[0], transpose.packet[1]);
|
||||
sum = padd(sum, transpose.packet[2]);
|
||||
sum = padd(sum, transpose.packet[3]);
|
||||
return sum;
|
||||
}
|
||||
|
||||
// Other reduction functions:
|
||||
// mul
|
||||
template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
|
||||
@@ -507,6 +806,12 @@ template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
|
||||
return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
// Return predux_mul<Packet2d> of the subvectors product
|
||||
return static_cast<float>(pfirst(predux_mul(pmul(a.v4f[0], a.v4f[1]))));
|
||||
}
|
||||
|
||||
// min
|
||||
template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
@@ -521,6 +826,14 @@ template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
|
||||
return pfirst(pmin<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet2d b, res;
|
||||
b = pmin<Packet2d>(a.v4f[0], a.v4f[1]);
|
||||
res = pmin<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
|
||||
return static_cast<float>(pfirst(res));
|
||||
}
|
||||
|
||||
// max
|
||||
template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
@@ -536,6 +849,14 @@ template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
|
||||
return pfirst(pmax<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet2d b, res;
|
||||
b = pmax<Packet2d>(a.v4f[0], a.v4f[1]);
|
||||
res = pmax<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
|
||||
return static_cast<float>(pfirst(res));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4i,4>& kernel) {
|
||||
Packet4i t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
|
||||
@@ -556,12 +877,61 @@ ptranspose(PacketBlock<Packet2d,2>& kernel) {
|
||||
kernel.packet[1] = t1;
|
||||
}
|
||||
|
||||
/* Split the Packet4f PacketBlock into 4 Packet2d PacketBlocks and transpose each one
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4f,4>& kernel) {
|
||||
PacketBlock<Packet2d,2> t0,t1,t2,t3;
|
||||
// copy top-left 2x2 Packet2d block
|
||||
t0.packet[0] = kernel.packet[0].v4f[0];
|
||||
t0.packet[1] = kernel.packet[1].v4f[0];
|
||||
|
||||
// copy top-right 2x2 Packet2d block
|
||||
t1.packet[0] = kernel.packet[0].v4f[1];
|
||||
t1.packet[1] = kernel.packet[1].v4f[1];
|
||||
|
||||
// copy bottom-left 2x2 Packet2d block
|
||||
t2.packet[0] = kernel.packet[2].v4f[0];
|
||||
t2.packet[1] = kernel.packet[3].v4f[0];
|
||||
|
||||
// copy bottom-right 2x2 Packet2d block
|
||||
t3.packet[0] = kernel.packet[2].v4f[1];
|
||||
t3.packet[1] = kernel.packet[3].v4f[1];
|
||||
|
||||
// Transpose all 2x2 blocks
|
||||
ptranspose(t0);
|
||||
ptranspose(t1);
|
||||
ptranspose(t2);
|
||||
ptranspose(t3);
|
||||
|
||||
// Copy back transposed blocks, but exchange t1 and t2 due to transposition
|
||||
kernel.packet[0].v4f[0] = t0.packet[0];
|
||||
kernel.packet[0].v4f[1] = t2.packet[0];
|
||||
kernel.packet[1].v4f[0] = t0.packet[1];
|
||||
kernel.packet[1].v4f[1] = t2.packet[1];
|
||||
kernel.packet[2].v4f[0] = t1.packet[0];
|
||||
kernel.packet[2].v4f[1] = t3.packet[0];
|
||||
kernel.packet[3].v4f[0] = t1.packet[1];
|
||||
kernel.packet[3].v4f[1] = t3.packet[1];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
|
||||
Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
|
||||
Packet4ui mask = vec_cmpeq(select, reinterpret_cast<Packet4ui>(p4i_ONE));
|
||||
return vec_sel(elsePacket, thenPacket, mask);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
|
||||
Packet2ul select_hi = { ifPacket.select[0], ifPacket.select[1] };
|
||||
Packet2ul select_lo = { ifPacket.select[2], ifPacket.select[3] };
|
||||
Packet2ul mask_hi = vec_cmpeq(select_hi, reinterpret_cast<Packet2ul>(p2l_ONE));
|
||||
Packet2ul mask_lo = vec_cmpeq(select_lo, reinterpret_cast<Packet2ul>(p2l_ONE));
|
||||
Packet4f result;
|
||||
result.v4f[0] = vec_sel(elsePacket.v4f[0], thenPacket.v4f[0], mask_hi);
|
||||
result.v4f[1] = vec_sel(elsePacket.v4f[1], thenPacket.v4f[1], mask_lo);
|
||||
return result;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
|
||||
Packet2ul select = { ifPacket.select[0], ifPacket.select[1] };
|
||||
Packet2ul mask = vec_cmpeq(select, reinterpret_cast<Packet2ul>(p2l_ONE));
|
||||
|
||||
@@ -45,7 +45,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
|
||||
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
|
||||
m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
|
||||
m_interPacket(plset<Packet>(0)),
|
||||
m_flip(std::abs(high)<std::abs(low))
|
||||
m_flip(numext::abs(high)<numext::abs(low))
|
||||
{}
|
||||
|
||||
template<typename IndexType>
|
||||
@@ -94,7 +94,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/true>
|
||||
m_low(low),
|
||||
m_multiplier((high-low)/convert_index<Scalar>(num_steps<=1 ? 1 : num_steps-1)),
|
||||
m_divisor(convert_index<Scalar>(num_steps+high-low)/(high-low+1)),
|
||||
m_use_divisor((high-low+1)<num_steps)
|
||||
m_use_divisor((high+1)<(low+num_steps))
|
||||
{}
|
||||
|
||||
template<typename IndexType>
|
||||
@@ -173,6 +173,13 @@ template<typename Scalar, typename PacketType,typename IndexType>
|
||||
struct has_unary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 1}; };
|
||||
template<typename Scalar, typename PacketType,typename IndexType>
|
||||
struct has_binary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 0}; };
|
||||
|
||||
template<typename Scalar,typename IndexType>
|
||||
struct has_nullary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 1}; };
|
||||
template<typename Scalar,typename IndexType>
|
||||
struct has_unary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 0}; };
|
||||
template<typename Scalar,typename IndexType>
|
||||
struct has_binary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 0}; };
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
@@ -972,7 +972,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \
|
||||
EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
|
||||
internal::prefetch(blA+(3*K+16)*LhsProgress); \
|
||||
if (EIGEN_ARCH_ARM) internal::prefetch(blB+(4*K+16)*RhsProgress); /* Bug 953 */ \
|
||||
if (EIGEN_ARCH_ARM) { internal::prefetch(blB+(4*K+16)*RhsProgress); } /* Bug 953 */ \
|
||||
traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
|
||||
traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
|
||||
traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
#define EIGEN_WORLD_VERSION 3
|
||||
#define EIGEN_MAJOR_VERSION 3
|
||||
#define EIGEN_MINOR_VERSION 0
|
||||
#define EIGEN_MINOR_VERSION 1
|
||||
|
||||
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
|
||||
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
|
||||
|
||||
@@ -523,7 +523,7 @@ template<typename T> struct smart_memmove_helper<T,true> {
|
||||
template<typename T> struct smart_memmove_helper<T,false> {
|
||||
static inline void run(const T* start, const T* end, T* target)
|
||||
{
|
||||
if (uintptr_t(target) < uintptr_t(start))
|
||||
if (UIntPtr(target) < UIntPtr(start))
|
||||
{
|
||||
std::copy(start, end, target);
|
||||
}
|
||||
|
||||
@@ -381,12 +381,12 @@ struct has_ReturnType
|
||||
enum { value = sizeof(testFunctor<T>(0)) == sizeof(meta_yes) };
|
||||
};
|
||||
|
||||
template<typename T> const T& return_ref();
|
||||
template<typename T> const T* return_ptr();
|
||||
|
||||
template <typename T, typename IndexType=Index>
|
||||
struct has_nullary_operator
|
||||
{
|
||||
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref<C>().operator()())>0)>::type * = 0);
|
||||
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()())>0)>::type * = 0);
|
||||
static meta_no testFunctor(...);
|
||||
|
||||
enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
|
||||
@@ -395,7 +395,7 @@ struct has_nullary_operator
|
||||
template <typename T, typename IndexType=Index>
|
||||
struct has_unary_operator
|
||||
{
|
||||
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref<C>().operator()(IndexType(0)))>0)>::type * = 0);
|
||||
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0)))>0)>::type * = 0);
|
||||
static meta_no testFunctor(...);
|
||||
|
||||
enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
|
||||
@@ -404,7 +404,7 @@ struct has_unary_operator
|
||||
template <typename T, typename IndexType=Index>
|
||||
struct has_binary_operator
|
||||
{
|
||||
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref<C>().operator()(IndexType(0),IndexType(0)))>0)>::type * = 0);
|
||||
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0),IndexType(0)))>0)>::type * = 0);
|
||||
static meta_no testFunctor(...);
|
||||
|
||||
enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
|
||||
|
||||
@@ -445,15 +445,11 @@ template<typename T, int n, typename PlainObject = typename plain_object_eval<T>
|
||||
// Another solution could be to count the number of temps?
|
||||
NAsInteger = n == Dynamic ? HugeCost : n,
|
||||
CostEval = (NAsInteger+1) * ScalarReadCost + CoeffReadCost,
|
||||
CostNoEval = NAsInteger * CoeffReadCost
|
||||
CostNoEval = NAsInteger * CoeffReadCost,
|
||||
Evaluate = (int(evaluator<T>::Flags) & EvalBeforeNestingBit) || (int(CostEval) < int(CostNoEval))
|
||||
};
|
||||
|
||||
typedef typename conditional<
|
||||
( (int(evaluator<T>::Flags) & EvalBeforeNestingBit) ||
|
||||
(int(CostEval) < int(CostNoEval)) ),
|
||||
PlainObject,
|
||||
typename ref_selector<T>::type
|
||||
>::type type;
|
||||
typedef typename conditional<Evaluate, PlainObject, typename ref_selector<T>::type>::type type;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
|
||||
@@ -506,7 +506,7 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
|
||||
m_colNormsUpdated.coeffRef(k) = m_colNormsDirect.coeffRef(k);
|
||||
}
|
||||
|
||||
RealScalar threshold_helper = numext::abs2(m_colNormsUpdated.maxCoeff() * NumTraits<Scalar>::epsilon()) / RealScalar(rows);
|
||||
RealScalar threshold_helper = numext::abs2<Scalar>(m_colNormsUpdated.maxCoeff() * NumTraits<Scalar>::epsilon()) / RealScalar(rows);
|
||||
RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<Scalar>::epsilon());
|
||||
|
||||
m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
|
||||
@@ -557,8 +557,8 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
|
||||
RealScalar temp = abs(m_qr.coeffRef(k, j)) / m_colNormsUpdated.coeffRef(j);
|
||||
temp = (RealScalar(1) + temp) * (RealScalar(1) - temp);
|
||||
temp = temp < 0 ? 0 : temp;
|
||||
RealScalar temp2 = temp * numext::abs2(m_colNormsUpdated.coeffRef(j) /
|
||||
m_colNormsDirect.coeffRef(j));
|
||||
RealScalar temp2 = temp * numext::abs2<Scalar>(m_colNormsUpdated.coeffRef(j) /
|
||||
m_colNormsDirect.coeffRef(j));
|
||||
if (temp2 <= norm_downdate_threshold) {
|
||||
// The updated norm has become too inaccurate so re-compute the column
|
||||
// norm directly.
|
||||
|
||||
@@ -412,7 +412,7 @@ struct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, true>
|
||||
}
|
||||
|
||||
// update largest diagonal entry
|
||||
maxDiagEntry = numext::maxi(maxDiagEntry,numext::maxi(abs(work_matrix.coeff(p,p)), abs(work_matrix.coeff(q,q))));
|
||||
maxDiagEntry = numext::maxi<RealScalar>(maxDiagEntry,numext::maxi<RealScalar>(abs(work_matrix.coeff(p,p)), abs(work_matrix.coeff(q,q))));
|
||||
// and check whether the 2x2 block is already diagonal
|
||||
RealScalar threshold = numext::maxi<RealScalar>(considerAsZero, precision * maxDiagEntry);
|
||||
return abs(work_matrix.coeff(p,q))>threshold || abs(work_matrix.coeff(q,p)) > threshold;
|
||||
@@ -725,7 +725,7 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
|
||||
if(computeV()) m_matrixV.applyOnTheRight(p,q,j_right);
|
||||
|
||||
// keep track of the largest diagonal coefficient
|
||||
maxDiagEntry = numext::maxi<RealScalar>(maxDiagEntry,numext::maxi(abs(m_workMatrix.coeff(p,p)), abs(m_workMatrix.coeff(q,q))));
|
||||
maxDiagEntry = numext::maxi<RealScalar>(maxDiagEntry,numext::maxi<RealScalar>(abs(m_workMatrix.coeff(p,p)), abs(m_workMatrix.coeff(q,q))));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -130,7 +130,7 @@ public:
|
||||
|
||||
// 2 - let's check whether there is enough allocated memory
|
||||
Index nnz = tmp.nonZeros();
|
||||
Index start = m_outerStart==0 ? 0 : matrix.outerIndexPtr()[m_outerStart]; // starting position of the current block
|
||||
Index start = m_outerStart==0 ? 0 : m_matrix.outerIndexPtr()[m_outerStart]; // starting position of the current block
|
||||
Index end = m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()]; // ending position of the current block
|
||||
Index block_size = end - start; // available room in the current block
|
||||
Index tail_size = m_matrix.outerIndexPtr()[m_matrix.outerSize()] - end;
|
||||
@@ -139,6 +139,8 @@ public:
|
||||
? Index(matrix.data().allocatedSize()) + block_size
|
||||
: block_size;
|
||||
|
||||
Index tmp_start = tmp.outerIndexPtr()[0];
|
||||
|
||||
bool update_trailing_pointers = false;
|
||||
if(nnz>free_size)
|
||||
{
|
||||
@@ -148,8 +150,8 @@ public:
|
||||
internal::smart_copy(m_matrix.valuePtr(), m_matrix.valuePtr() + start, newdata.valuePtr());
|
||||
internal::smart_copy(m_matrix.innerIndexPtr(), m_matrix.innerIndexPtr() + start, newdata.indexPtr());
|
||||
|
||||
internal::smart_copy(tmp.valuePtr(), tmp.valuePtr() + nnz, newdata.valuePtr() + start);
|
||||
internal::smart_copy(tmp.innerIndexPtr(), tmp.innerIndexPtr() + nnz, newdata.indexPtr() + start);
|
||||
internal::smart_copy(tmp.valuePtr() + tmp_start, tmp.valuePtr() + tmp_start + nnz, newdata.valuePtr() + start);
|
||||
internal::smart_copy(tmp.innerIndexPtr() + tmp_start, tmp.innerIndexPtr() + tmp_start + nnz, newdata.indexPtr() + start);
|
||||
|
||||
internal::smart_copy(matrix.valuePtr()+end, matrix.valuePtr()+end + tail_size, newdata.valuePtr()+start+nnz);
|
||||
internal::smart_copy(matrix.innerIndexPtr()+end, matrix.innerIndexPtr()+end + tail_size, newdata.indexPtr()+start+nnz);
|
||||
@@ -173,8 +175,8 @@ public:
|
||||
update_trailing_pointers = true;
|
||||
}
|
||||
|
||||
internal::smart_copy(tmp.valuePtr(), tmp.valuePtr() + nnz, matrix.valuePtr() + start);
|
||||
internal::smart_copy(tmp.innerIndexPtr(), tmp.innerIndexPtr() + nnz, matrix.innerIndexPtr() + start);
|
||||
internal::smart_copy(tmp.valuePtr() + tmp_start, tmp.valuePtr() + tmp_start + nnz, matrix.valuePtr() + start);
|
||||
internal::smart_copy(tmp.innerIndexPtr() + tmp_start, tmp.innerIndexPtr() + tmp_start + nnz, matrix.innerIndexPtr() + start);
|
||||
}
|
||||
|
||||
// update outer index pointers and innerNonZeros
|
||||
@@ -430,7 +432,6 @@ public:
|
||||
|
||||
protected:
|
||||
// friend class internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel>;
|
||||
friend class ReverseInnerIterator;
|
||||
friend struct internal::unary_evaluator<Block<XprType,BlockRows,BlockCols,InnerPanel>, internal::IteratorBased, Scalar >;
|
||||
|
||||
Index nonZeros() const { return Dynamic; }
|
||||
@@ -466,8 +467,6 @@ struct unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBa
|
||||
typedef typename XprType::StorageIndex StorageIndex;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
|
||||
class ReverseInnerIterator;
|
||||
|
||||
enum {
|
||||
IsRowMajor = XprType::IsRowMajor,
|
||||
|
||||
|
||||
@@ -224,11 +224,11 @@ class SparseCompressedBase<Derived>::ReverseInnerIterator
|
||||
}
|
||||
else
|
||||
{
|
||||
m_start.value() = mat.outerIndexPtr()[outer];
|
||||
m_start = mat.outerIndexPtr()[outer];
|
||||
if(mat.isCompressed())
|
||||
m_id = mat.outerIndexPtr()[outer+1];
|
||||
else
|
||||
m_id = m_start.value() + mat.innerNonZeroPtr()[outer];
|
||||
m_id = m_start + mat.innerNonZeroPtr()[outer];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -254,14 +254,15 @@ class SparseCompressedBase<Derived>::ReverseInnerIterator
|
||||
inline Index row() const { return IsRowMajor ? m_outer.value() : index(); }
|
||||
inline Index col() const { return IsRowMajor ? index() : m_outer.value(); }
|
||||
|
||||
inline operator bool() const { return (m_id > m_start.value()); }
|
||||
inline operator bool() const { return (m_id > m_start); }
|
||||
|
||||
protected:
|
||||
const Scalar* m_values;
|
||||
const StorageIndex* m_indices;
|
||||
const internal::variable_if_dynamic<Index,Derived::IsVectorAtCompileTime?0:Dynamic> m_outer;
|
||||
typedef internal::variable_if_dynamic<Index,Derived::IsVectorAtCompileTime?0:Dynamic> OuterType;
|
||||
const OuterType m_outer;
|
||||
Index m_start;
|
||||
Index m_id;
|
||||
const internal::variable_if_dynamic<Index,Derived::IsVectorAtCompileTime?0:Dynamic> m_start;
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
@@ -272,7 +273,6 @@ struct evaluator<SparseCompressedBase<Derived> >
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename Derived::InnerIterator InnerIterator;
|
||||
typedef typename Derived::ReverseInnerIterator ReverseInnerIterator;
|
||||
|
||||
enum {
|
||||
CoeffReadCost = NumTraits<Scalar>::ReadCost,
|
||||
|
||||
@@ -68,7 +68,6 @@ protected:
|
||||
typedef typename XprType::StorageIndex StorageIndex;
|
||||
public:
|
||||
|
||||
class ReverseInnerIterator;
|
||||
class InnerIterator
|
||||
{
|
||||
public:
|
||||
@@ -161,7 +160,6 @@ protected:
|
||||
typedef typename XprType::StorageIndex StorageIndex;
|
||||
public:
|
||||
|
||||
class ReverseInnerIterator;
|
||||
class InnerIterator
|
||||
{
|
||||
enum { IsRowMajor = (int(Rhs::Flags)&RowMajorBit)==RowMajorBit };
|
||||
@@ -249,7 +247,6 @@ protected:
|
||||
typedef typename XprType::StorageIndex StorageIndex;
|
||||
public:
|
||||
|
||||
class ReverseInnerIterator;
|
||||
class InnerIterator
|
||||
{
|
||||
enum { IsRowMajor = (int(Lhs::Flags)&RowMajorBit)==RowMajorBit };
|
||||
@@ -325,26 +322,88 @@ protected:
|
||||
const XprType &m_expr;
|
||||
};
|
||||
|
||||
template<typename T,
|
||||
typename LhsKind = typename evaluator_traits<typename T::Lhs>::Kind,
|
||||
typename RhsKind = typename evaluator_traits<typename T::Rhs>::Kind,
|
||||
typename LhsScalar = typename traits<typename T::Lhs>::Scalar,
|
||||
typename RhsScalar = typename traits<typename T::Rhs>::Scalar> struct sparse_conjunction_evaluator;
|
||||
|
||||
// "sparse .* sparse"
|
||||
template<typename T1, typename T2, typename Lhs, typename Rhs>
|
||||
struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs>, IteratorBased, IteratorBased>
|
||||
: evaluator_base<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> >
|
||||
: sparse_conjunction_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> >
|
||||
{
|
||||
typedef CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> XprType;
|
||||
typedef sparse_conjunction_evaluator<XprType> Base;
|
||||
explicit binary_evaluator(const XprType& xpr) : Base(xpr) {}
|
||||
};
|
||||
// "dense .* sparse"
|
||||
template<typename T1, typename T2, typename Lhs, typename Rhs>
|
||||
struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs>, IndexBased, IteratorBased>
|
||||
: sparse_conjunction_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> >
|
||||
{
|
||||
typedef CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> XprType;
|
||||
typedef sparse_conjunction_evaluator<XprType> Base;
|
||||
explicit binary_evaluator(const XprType& xpr) : Base(xpr) {}
|
||||
};
|
||||
// "sparse .* dense"
|
||||
template<typename T1, typename T2, typename Lhs, typename Rhs>
|
||||
struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs>, IteratorBased, IndexBased>
|
||||
: sparse_conjunction_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> >
|
||||
{
|
||||
typedef CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> XprType;
|
||||
typedef sparse_conjunction_evaluator<XprType> Base;
|
||||
explicit binary_evaluator(const XprType& xpr) : Base(xpr) {}
|
||||
};
|
||||
|
||||
// "sparse && sparse"
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct binary_evaluator<CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs>, IteratorBased, IteratorBased>
|
||||
: sparse_conjunction_evaluator<CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs> >
|
||||
{
|
||||
typedef CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs> XprType;
|
||||
typedef sparse_conjunction_evaluator<XprType> Base;
|
||||
explicit binary_evaluator(const XprType& xpr) : Base(xpr) {}
|
||||
};
|
||||
// "dense && sparse"
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct binary_evaluator<CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs>, IndexBased, IteratorBased>
|
||||
: sparse_conjunction_evaluator<CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs> >
|
||||
{
|
||||
typedef CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs> XprType;
|
||||
typedef sparse_conjunction_evaluator<XprType> Base;
|
||||
explicit binary_evaluator(const XprType& xpr) : Base(xpr) {}
|
||||
};
|
||||
// "sparse && dense"
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct binary_evaluator<CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs>, IteratorBased, IndexBased>
|
||||
: sparse_conjunction_evaluator<CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs> >
|
||||
{
|
||||
typedef CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs> XprType;
|
||||
typedef sparse_conjunction_evaluator<XprType> Base;
|
||||
explicit binary_evaluator(const XprType& xpr) : Base(xpr) {}
|
||||
};
|
||||
|
||||
// "sparse ^ sparse"
|
||||
template<typename XprType>
|
||||
struct sparse_conjunction_evaluator<XprType, IteratorBased, IteratorBased>
|
||||
: evaluator_base<XprType>
|
||||
{
|
||||
protected:
|
||||
typedef scalar_product_op<T1,T2> BinaryOp;
|
||||
typedef typename evaluator<Lhs>::InnerIterator LhsIterator;
|
||||
typedef typename evaluator<Rhs>::InnerIterator RhsIterator;
|
||||
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
|
||||
typedef typename XprType::Functor BinaryOp;
|
||||
typedef typename XprType::Lhs LhsArg;
|
||||
typedef typename XprType::Rhs RhsArg;
|
||||
typedef typename evaluator<LhsArg>::InnerIterator LhsIterator;
|
||||
typedef typename evaluator<RhsArg>::InnerIterator RhsIterator;
|
||||
typedef typename XprType::StorageIndex StorageIndex;
|
||||
typedef typename traits<XprType>::Scalar Scalar;
|
||||
public:
|
||||
|
||||
class ReverseInnerIterator;
|
||||
class InnerIterator
|
||||
{
|
||||
public:
|
||||
|
||||
EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)
|
||||
EIGEN_STRONG_INLINE InnerIterator(const sparse_conjunction_evaluator& aEval, Index outer)
|
||||
: m_lhsIter(aEval.m_lhsImpl,outer), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor)
|
||||
{
|
||||
while (m_lhsIter && m_rhsIter && (m_lhsIter.index() != m_rhsIter.index()))
|
||||
@@ -386,11 +445,11 @@ public:
|
||||
|
||||
|
||||
enum {
|
||||
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
|
||||
CoeffReadCost = evaluator<LhsArg>::CoeffReadCost + evaluator<RhsArg>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
|
||||
Flags = XprType::Flags
|
||||
};
|
||||
|
||||
explicit binary_evaluator(const XprType& xpr)
|
||||
explicit sparse_conjunction_evaluator(const XprType& xpr)
|
||||
: m_functor(xpr.functor()),
|
||||
m_lhsImpl(xpr.lhs()),
|
||||
m_rhsImpl(xpr.rhs())
|
||||
@@ -405,32 +464,32 @@ public:
|
||||
|
||||
protected:
|
||||
const BinaryOp m_functor;
|
||||
evaluator<Lhs> m_lhsImpl;
|
||||
evaluator<Rhs> m_rhsImpl;
|
||||
evaluator<LhsArg> m_lhsImpl;
|
||||
evaluator<RhsArg> m_rhsImpl;
|
||||
};
|
||||
|
||||
// "dense .* sparse"
|
||||
template<typename T1, typename T2, typename Lhs, typename Rhs>
|
||||
struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs>, IndexBased, IteratorBased>
|
||||
: evaluator_base<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> >
|
||||
// "dense ^ sparse"
|
||||
template<typename XprType>
|
||||
struct sparse_conjunction_evaluator<XprType, IndexBased, IteratorBased>
|
||||
: evaluator_base<XprType>
|
||||
{
|
||||
protected:
|
||||
typedef scalar_product_op<T1,T2> BinaryOp;
|
||||
typedef evaluator<Lhs> LhsEvaluator;
|
||||
typedef typename evaluator<Rhs>::InnerIterator RhsIterator;
|
||||
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
|
||||
typedef typename XprType::Functor BinaryOp;
|
||||
typedef typename XprType::Lhs LhsArg;
|
||||
typedef typename XprType::Rhs RhsArg;
|
||||
typedef evaluator<LhsArg> LhsEvaluator;
|
||||
typedef typename evaluator<RhsArg>::InnerIterator RhsIterator;
|
||||
typedef typename XprType::StorageIndex StorageIndex;
|
||||
typedef typename traits<XprType>::Scalar Scalar;
|
||||
public:
|
||||
|
||||
class ReverseInnerIterator;
|
||||
class InnerIterator
|
||||
{
|
||||
enum { IsRowMajor = (int(Rhs::Flags)&RowMajorBit)==RowMajorBit };
|
||||
enum { IsRowMajor = (int(RhsArg::Flags)&RowMajorBit)==RowMajorBit };
|
||||
|
||||
public:
|
||||
|
||||
EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)
|
||||
EIGEN_STRONG_INLINE InnerIterator(const sparse_conjunction_evaluator& aEval, Index outer)
|
||||
: m_lhsEval(aEval.m_lhsImpl), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor), m_outer(outer)
|
||||
{}
|
||||
|
||||
@@ -458,12 +517,12 @@ public:
|
||||
|
||||
|
||||
enum {
|
||||
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
|
||||
CoeffReadCost = evaluator<LhsArg>::CoeffReadCost + evaluator<RhsArg>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
|
||||
// Expose storage order of the sparse expression
|
||||
Flags = (XprType::Flags & ~RowMajorBit) | (int(Rhs::Flags)&RowMajorBit)
|
||||
Flags = (XprType::Flags & ~RowMajorBit) | (int(RhsArg::Flags)&RowMajorBit)
|
||||
};
|
||||
|
||||
explicit binary_evaluator(const XprType& xpr)
|
||||
explicit sparse_conjunction_evaluator(const XprType& xpr)
|
||||
: m_functor(xpr.functor()),
|
||||
m_lhsImpl(xpr.lhs()),
|
||||
m_rhsImpl(xpr.rhs())
|
||||
@@ -478,32 +537,32 @@ public:
|
||||
|
||||
protected:
|
||||
const BinaryOp m_functor;
|
||||
evaluator<Lhs> m_lhsImpl;
|
||||
evaluator<Rhs> m_rhsImpl;
|
||||
evaluator<LhsArg> m_lhsImpl;
|
||||
evaluator<RhsArg> m_rhsImpl;
|
||||
};
|
||||
|
||||
// "sparse .* dense"
|
||||
template<typename T1, typename T2, typename Lhs, typename Rhs>
|
||||
struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs>, IteratorBased, IndexBased>
|
||||
: evaluator_base<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> >
|
||||
// "sparse ^ dense"
|
||||
template<typename XprType>
|
||||
struct sparse_conjunction_evaluator<XprType, IteratorBased, IndexBased>
|
||||
: evaluator_base<XprType>
|
||||
{
|
||||
protected:
|
||||
typedef scalar_product_op<T1,T2> BinaryOp;
|
||||
typedef typename evaluator<Lhs>::InnerIterator LhsIterator;
|
||||
typedef evaluator<Rhs> RhsEvaluator;
|
||||
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
|
||||
typedef typename XprType::Functor BinaryOp;
|
||||
typedef typename XprType::Lhs LhsArg;
|
||||
typedef typename XprType::Rhs RhsArg;
|
||||
typedef typename evaluator<LhsArg>::InnerIterator LhsIterator;
|
||||
typedef evaluator<RhsArg> RhsEvaluator;
|
||||
typedef typename XprType::StorageIndex StorageIndex;
|
||||
typedef typename traits<XprType>::Scalar Scalar;
|
||||
public:
|
||||
|
||||
class ReverseInnerIterator;
|
||||
class InnerIterator
|
||||
{
|
||||
enum { IsRowMajor = (int(Lhs::Flags)&RowMajorBit)==RowMajorBit };
|
||||
enum { IsRowMajor = (int(LhsArg::Flags)&RowMajorBit)==RowMajorBit };
|
||||
|
||||
public:
|
||||
|
||||
EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)
|
||||
EIGEN_STRONG_INLINE InnerIterator(const sparse_conjunction_evaluator& aEval, Index outer)
|
||||
: m_lhsIter(aEval.m_lhsImpl,outer), m_rhsEval(aEval.m_rhsImpl), m_functor(aEval.m_functor), m_outer(outer)
|
||||
{}
|
||||
|
||||
@@ -525,19 +584,19 @@ public:
|
||||
|
||||
protected:
|
||||
LhsIterator m_lhsIter;
|
||||
const evaluator<Rhs> &m_rhsEval;
|
||||
const evaluator<RhsArg> &m_rhsEval;
|
||||
const BinaryOp& m_functor;
|
||||
const Index m_outer;
|
||||
};
|
||||
|
||||
|
||||
enum {
|
||||
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
|
||||
CoeffReadCost = evaluator<LhsArg>::CoeffReadCost + evaluator<RhsArg>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
|
||||
// Expose storage order of the sparse expression
|
||||
Flags = (XprType::Flags & ~RowMajorBit) | (int(Lhs::Flags)&RowMajorBit)
|
||||
Flags = (XprType::Flags & ~RowMajorBit) | (int(LhsArg::Flags)&RowMajorBit)
|
||||
};
|
||||
|
||||
explicit binary_evaluator(const XprType& xpr)
|
||||
explicit sparse_conjunction_evaluator(const XprType& xpr)
|
||||
: m_functor(xpr.functor()),
|
||||
m_lhsImpl(xpr.lhs()),
|
||||
m_rhsImpl(xpr.rhs())
|
||||
@@ -552,8 +611,8 @@ public:
|
||||
|
||||
protected:
|
||||
const BinaryOp m_functor;
|
||||
evaluator<Lhs> m_lhsImpl;
|
||||
evaluator<Rhs> m_rhsImpl;
|
||||
evaluator<LhsArg> m_lhsImpl;
|
||||
evaluator<RhsArg> m_rhsImpl;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -22,7 +22,6 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>
|
||||
typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
|
||||
|
||||
class InnerIterator;
|
||||
class ReverseInnerIterator;
|
||||
|
||||
enum {
|
||||
CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
|
||||
@@ -41,7 +40,6 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>
|
||||
|
||||
protected:
|
||||
typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
|
||||
// typedef typename evaluator<ArgType>::ReverseInnerIterator EvalReverseIterator;
|
||||
|
||||
const UnaryOp m_functor;
|
||||
evaluator<ArgType> m_argImpl;
|
||||
@@ -70,33 +68,6 @@ class unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::InnerIterat
|
||||
Scalar& valueRef();
|
||||
};
|
||||
|
||||
// template<typename UnaryOp, typename ArgType>
|
||||
// class unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::ReverseInnerIterator
|
||||
// : public unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalReverseIterator
|
||||
// {
|
||||
// typedef typename XprType::Scalar Scalar;
|
||||
// typedef typename unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalReverseIterator Base;
|
||||
// public:
|
||||
//
|
||||
// EIGEN_STRONG_INLINE ReverseInnerIterator(const XprType& unaryOp, typename XprType::Index outer)
|
||||
// : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor())
|
||||
// {}
|
||||
//
|
||||
// EIGEN_STRONG_INLINE ReverseInnerIterator& operator--()
|
||||
// { Base::operator--(); return *this; }
|
||||
//
|
||||
// EIGEN_STRONG_INLINE Scalar value() const { return m_functor(Base::value()); }
|
||||
//
|
||||
// protected:
|
||||
// const UnaryOp m_functor;
|
||||
// private:
|
||||
// Scalar& valueRef();
|
||||
// };
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
template<typename ViewOp, typename ArgType>
|
||||
struct unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>
|
||||
: public evaluator_base<CwiseUnaryView<ViewOp,ArgType> >
|
||||
@@ -105,7 +76,6 @@ struct unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>
|
||||
typedef CwiseUnaryView<ViewOp, ArgType> XprType;
|
||||
|
||||
class InnerIterator;
|
||||
class ReverseInnerIterator;
|
||||
|
||||
enum {
|
||||
CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<ViewOp>::Cost,
|
||||
@@ -120,7 +90,6 @@ struct unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>
|
||||
|
||||
protected:
|
||||
typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
|
||||
// typedef typename evaluator<ArgType>::ReverseInnerIterator EvalReverseIterator;
|
||||
|
||||
const ViewOp m_functor;
|
||||
evaluator<ArgType> m_argImpl;
|
||||
@@ -148,29 +117,6 @@ class unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::InnerItera
|
||||
const ViewOp m_functor;
|
||||
};
|
||||
|
||||
// template<typename ViewOp, typename ArgType>
|
||||
// class unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::ReverseInnerIterator
|
||||
// : public unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalReverseIterator
|
||||
// {
|
||||
// typedef typename XprType::Scalar Scalar;
|
||||
// typedef typename unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalReverseIterator Base;
|
||||
// public:
|
||||
//
|
||||
// EIGEN_STRONG_INLINE ReverseInnerIterator(const XprType& unaryOp, typename XprType::Index outer)
|
||||
// : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor())
|
||||
// {}
|
||||
//
|
||||
// EIGEN_STRONG_INLINE ReverseInnerIterator& operator--()
|
||||
// { Base::operator--(); return *this; }
|
||||
//
|
||||
// EIGEN_STRONG_INLINE Scalar value() const { return m_functor(Base::value()); }
|
||||
// EIGEN_STRONG_INLINE Scalar& valueRef() { return m_functor(Base::valueRef()); }
|
||||
//
|
||||
// protected:
|
||||
// const ViewOp m_functor;
|
||||
// };
|
||||
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
template<typename Derived>
|
||||
|
||||
@@ -786,30 +786,38 @@ class SparseMatrix
|
||||
EIGEN_DBG_SPARSE(
|
||||
s << "Nonzero entries:\n";
|
||||
if(m.isCompressed())
|
||||
{
|
||||
for (Index i=0; i<m.nonZeros(); ++i)
|
||||
s << "(" << m.m_data.value(i) << "," << m.m_data.index(i) << ") ";
|
||||
}
|
||||
else
|
||||
{
|
||||
for (Index i=0; i<m.outerSize(); ++i)
|
||||
{
|
||||
Index p = m.m_outerIndex[i];
|
||||
Index pe = m.m_outerIndex[i]+m.m_innerNonZeros[i];
|
||||
Index k=p;
|
||||
for (; k<pe; ++k)
|
||||
for (; k<pe; ++k) {
|
||||
s << "(" << m.m_data.value(k) << "," << m.m_data.index(k) << ") ";
|
||||
for (; k<m.m_outerIndex[i+1]; ++k)
|
||||
}
|
||||
for (; k<m.m_outerIndex[i+1]; ++k) {
|
||||
s << "(_,_) ";
|
||||
}
|
||||
}
|
||||
}
|
||||
s << std::endl;
|
||||
s << std::endl;
|
||||
s << "Outer pointers:\n";
|
||||
for (Index i=0; i<m.outerSize(); ++i)
|
||||
for (Index i=0; i<m.outerSize(); ++i) {
|
||||
s << m.m_outerIndex[i] << " ";
|
||||
}
|
||||
s << " $" << std::endl;
|
||||
if(!m.isCompressed())
|
||||
{
|
||||
s << "Inner non zeros:\n";
|
||||
for (Index i=0; i<m.outerSize(); ++i)
|
||||
for (Index i=0; i<m.outerSize(); ++i) {
|
||||
s << m.m_innerNonZeros[i] << " ";
|
||||
}
|
||||
s << " $" << std::endl;
|
||||
}
|
||||
s << std::endl;
|
||||
|
||||
@@ -185,20 +185,27 @@ class Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType
|
||||
EIGEN_SPARSE_PUBLIC_INTERFACE(Ref)
|
||||
|
||||
template<typename Derived>
|
||||
inline Ref(const SparseMatrixBase<Derived>& expr)
|
||||
inline Ref(const SparseMatrixBase<Derived>& expr) : m_hasCopy(false)
|
||||
{
|
||||
construct(expr.derived(), typename Traits::template match<Derived>::type());
|
||||
}
|
||||
|
||||
inline Ref(const Ref& other) : Base(other) {
|
||||
inline Ref(const Ref& other) : Base(other), m_hasCopy(false) {
|
||||
// copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy
|
||||
}
|
||||
|
||||
template<typename OtherRef>
|
||||
inline Ref(const RefBase<OtherRef>& other) {
|
||||
inline Ref(const RefBase<OtherRef>& other) : m_hasCopy(false) {
|
||||
construct(other.derived(), typename Traits::template match<OtherRef>::type());
|
||||
}
|
||||
|
||||
~Ref() {
|
||||
if(m_hasCopy) {
|
||||
TPlainObjectType* obj = reinterpret_cast<TPlainObjectType*>(m_object_bytes);
|
||||
obj->~TPlainObjectType();
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
template<typename Expression>
|
||||
@@ -208,6 +215,7 @@ class Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType
|
||||
{
|
||||
TPlainObjectType* obj = reinterpret_cast<TPlainObjectType*>(m_object_bytes);
|
||||
::new (obj) TPlainObjectType(expr);
|
||||
m_hasCopy = true;
|
||||
Base::construct(*obj);
|
||||
}
|
||||
else
|
||||
@@ -221,11 +229,13 @@ class Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType
|
||||
{
|
||||
TPlainObjectType* obj = reinterpret_cast<TPlainObjectType*>(m_object_bytes);
|
||||
::new (obj) TPlainObjectType(expr);
|
||||
m_hasCopy = true;
|
||||
Base::construct(*obj);
|
||||
}
|
||||
|
||||
protected:
|
||||
char m_object_bytes[sizeof(TPlainObjectType)];
|
||||
bool m_hasCopy;
|
||||
};
|
||||
|
||||
|
||||
@@ -293,20 +303,27 @@ class Ref<const SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType
|
||||
EIGEN_SPARSE_PUBLIC_INTERFACE(Ref)
|
||||
|
||||
template<typename Derived>
|
||||
inline Ref(const SparseMatrixBase<Derived>& expr)
|
||||
inline Ref(const SparseMatrixBase<Derived>& expr) : m_hasCopy(false)
|
||||
{
|
||||
construct(expr.derived(), typename Traits::template match<Derived>::type());
|
||||
}
|
||||
|
||||
inline Ref(const Ref& other) : Base(other) {
|
||||
inline Ref(const Ref& other) : Base(other), m_hasCopy(false) {
|
||||
// copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy
|
||||
}
|
||||
|
||||
template<typename OtherRef>
|
||||
inline Ref(const RefBase<OtherRef>& other) {
|
||||
inline Ref(const RefBase<OtherRef>& other) : m_hasCopy(false) {
|
||||
construct(other.derived(), typename Traits::template match<OtherRef>::type());
|
||||
}
|
||||
|
||||
~Ref() {
|
||||
if(m_hasCopy) {
|
||||
TPlainObjectType* obj = reinterpret_cast<TPlainObjectType*>(m_object_bytes);
|
||||
obj->~TPlainObjectType();
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
template<typename Expression>
|
||||
@@ -320,11 +337,13 @@ class Ref<const SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType
|
||||
{
|
||||
TPlainObjectType* obj = reinterpret_cast<TPlainObjectType*>(m_object_bytes);
|
||||
::new (obj) TPlainObjectType(expr);
|
||||
m_hasCopy = true;
|
||||
Base::construct(*obj);
|
||||
}
|
||||
|
||||
protected:
|
||||
char m_object_bytes[sizeof(TPlainObjectType)];
|
||||
bool m_hasCopy;
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
@@ -56,7 +56,6 @@ struct unary_evaluator<Transpose<ArgType>, IteratorBased>
|
||||
: public evaluator_base<Transpose<ArgType> >
|
||||
{
|
||||
typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
|
||||
typedef typename evaluator<ArgType>::ReverseInnerIterator EvalReverseIterator;
|
||||
public:
|
||||
typedef Transpose<ArgType> XprType;
|
||||
|
||||
@@ -75,17 +74,6 @@ struct unary_evaluator<Transpose<ArgType>, IteratorBased>
|
||||
Index col() const { return EvalIterator::row(); }
|
||||
};
|
||||
|
||||
class ReverseInnerIterator : public EvalReverseIterator
|
||||
{
|
||||
public:
|
||||
EIGEN_STRONG_INLINE ReverseInnerIterator(const unary_evaluator& unaryOp, Index outer)
|
||||
: EvalReverseIterator(unaryOp.m_argImpl,outer)
|
||||
{}
|
||||
|
||||
Index row() const { return EvalReverseIterator::col(); }
|
||||
Index col() const { return EvalReverseIterator::row(); }
|
||||
};
|
||||
|
||||
enum {
|
||||
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
|
||||
Flags = XprType::Flags
|
||||
|
||||
@@ -106,22 +106,22 @@ void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const
|
||||
|
||||
#define KMADD(c, a, b, tmp) {tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);}
|
||||
#define WORK(I) \
|
||||
c0 = pload<Packet>(C0+i+(I)*PacketSize); \
|
||||
c1 = pload<Packet>(C1+i+(I)*PacketSize); \
|
||||
KMADD(c0, a0, b00, t0) \
|
||||
KMADD(c1, a0, b01, t1) \
|
||||
a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
|
||||
KMADD(c0, a1, b10, t0) \
|
||||
KMADD(c1, a1, b11, t1) \
|
||||
a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
|
||||
if(RK==4) KMADD(c0, a2, b20, t0) \
|
||||
if(RK==4) KMADD(c1, a2, b21, t1) \
|
||||
if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \
|
||||
if(RK==4) KMADD(c0, a3, b30, t0) \
|
||||
if(RK==4) KMADD(c1, a3, b31, t1) \
|
||||
if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \
|
||||
pstore(C0+i+(I)*PacketSize, c0); \
|
||||
pstore(C1+i+(I)*PacketSize, c1)
|
||||
c0 = pload<Packet>(C0+i+(I)*PacketSize); \
|
||||
c1 = pload<Packet>(C1+i+(I)*PacketSize); \
|
||||
KMADD(c0, a0, b00, t0) \
|
||||
KMADD(c1, a0, b01, t1) \
|
||||
a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
|
||||
KMADD(c0, a1, b10, t0) \
|
||||
KMADD(c1, a1, b11, t1) \
|
||||
a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
|
||||
if(RK==4){ KMADD(c0, a2, b20, t0) }\
|
||||
if(RK==4){ KMADD(c1, a2, b21, t1) }\
|
||||
if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
|
||||
if(RK==4){ KMADD(c0, a3, b30, t0) }\
|
||||
if(RK==4){ KMADD(c1, a3, b31, t1) }\
|
||||
if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
|
||||
pstore(C0+i+(I)*PacketSize, c0); \
|
||||
pstore(C1+i+(I)*PacketSize, c1)
|
||||
|
||||
// process rows of A' - C' with aggressive vectorization and peeling
|
||||
for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
|
||||
@@ -131,14 +131,15 @@ void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const
|
||||
prefetch((A1+i+(5)*PacketSize));
|
||||
if(RK==4) prefetch((A2+i+(5)*PacketSize));
|
||||
if(RK==4) prefetch((A3+i+(5)*PacketSize));
|
||||
WORK(0);
|
||||
WORK(1);
|
||||
WORK(2);
|
||||
WORK(3);
|
||||
WORK(4);
|
||||
WORK(5);
|
||||
WORK(6);
|
||||
WORK(7);
|
||||
|
||||
WORK(0);
|
||||
WORK(1);
|
||||
WORK(2);
|
||||
WORK(3);
|
||||
WORK(4);
|
||||
WORK(5);
|
||||
WORK(6);
|
||||
WORK(7);
|
||||
}
|
||||
// process the remaining rows with vectorization only
|
||||
for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
|
||||
@@ -203,16 +204,16 @@ void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const
|
||||
}
|
||||
|
||||
#define WORK(I) \
|
||||
c0 = pload<Packet>(C0+i+(I)*PacketSize); \
|
||||
KMADD(c0, a0, b00, t0) \
|
||||
a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
|
||||
KMADD(c0, a1, b10, t0) \
|
||||
a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
|
||||
if(RK==4) KMADD(c0, a2, b20, t0) \
|
||||
if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \
|
||||
if(RK==4) KMADD(c0, a3, b30, t0) \
|
||||
if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \
|
||||
pstore(C0+i+(I)*PacketSize, c0);
|
||||
c0 = pload<Packet>(C0+i+(I)*PacketSize); \
|
||||
KMADD(c0, a0, b00, t0) \
|
||||
a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
|
||||
KMADD(c0, a1, b10, t0) \
|
||||
a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
|
||||
if(RK==4){ KMADD(c0, a2, b20, t0) }\
|
||||
if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
|
||||
if(RK==4){ KMADD(c0, a3, b30, t0) }\
|
||||
if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
|
||||
pstore(C0+i+(I)*PacketSize, c0);
|
||||
|
||||
// agressive vectorization and peeling
|
||||
for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
|
||||
|
||||
@@ -269,44 +269,6 @@ const CwiseBinaryOp<internal::scalar_difference_op<T,Scalar>,Constant<T>,Derived
|
||||
operator/(const T& s,const StorageBaseType& a);
|
||||
#endif
|
||||
|
||||
/** \returns an expression of the coefficient-wise && operator of *this and \a other
|
||||
*
|
||||
* \warning this operator is for expression of bool only.
|
||||
*
|
||||
* Example: \include Cwise_boolean_and.cpp
|
||||
* Output: \verbinclude Cwise_boolean_and.out
|
||||
*
|
||||
* \sa operator||(), select()
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>
|
||||
operator&&(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
|
||||
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
|
||||
return CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>(derived(),other.derived());
|
||||
}
|
||||
|
||||
/** \returns an expression of the coefficient-wise || operator of *this and \a other
|
||||
*
|
||||
* \warning this operator is for expression of bool only.
|
||||
*
|
||||
* Example: \include Cwise_boolean_or.cpp
|
||||
* Output: \verbinclude Cwise_boolean_or.out
|
||||
*
|
||||
* \sa operator&&(), select()
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>
|
||||
operator||(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
|
||||
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
|
||||
return CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>(derived(),other.derived());
|
||||
}
|
||||
|
||||
/** \returns an expression of the coefficient-wise ^ operator of *this and \a other
|
||||
*
|
||||
* \warning this operator is for expression of bool only.
|
||||
|
||||
@@ -75,3 +75,41 @@ EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient)
|
||||
template<typename T>
|
||||
const CwiseBinaryOp<internal::scalar_quotient_op<Scalar,T>,Derived,Constant<T> > operator/(const T& scalar) const;
|
||||
#endif
|
||||
|
||||
/** \returns an expression of the coefficient-wise boolean \b and operator of \c *this and \a other
|
||||
*
|
||||
* \warning this operator is for expression of bool only.
|
||||
*
|
||||
* Example: \include Cwise_boolean_and.cpp
|
||||
* Output: \verbinclude Cwise_boolean_and.out
|
||||
*
|
||||
* \sa operator||(), select()
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>
|
||||
operator&&(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
|
||||
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
|
||||
return CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>(derived(),other.derived());
|
||||
}
|
||||
|
||||
/** \returns an expression of the coefficient-wise boolean \b or operator of \c *this and \a other
|
||||
*
|
||||
* \warning this operator is for expression of bool only.
|
||||
*
|
||||
* Example: \include Cwise_boolean_or.cpp
|
||||
* Output: \verbinclude Cwise_boolean_or.out
|
||||
*
|
||||
* \sa operator&&(), select()
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>
|
||||
operator||(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
|
||||
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
|
||||
return CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>(derived(),other.derived());
|
||||
}
|
||||
|
||||
@@ -1,28 +1,21 @@
|
||||
# -*- cmake -*-
|
||||
#
|
||||
# Eigen3Config.cmake(.in)
|
||||
# This file exports the Eigen3::Eigen CMake target which should be passed to the
|
||||
# target_link_libraries command.
|
||||
|
||||
# Use the following variables to compile and link against Eigen:
|
||||
# EIGEN3_FOUND - True if Eigen was found on your system
|
||||
# EIGEN3_USE_FILE - The file making Eigen usable
|
||||
# EIGEN3_DEFINITIONS - Definitions needed to build with Eigen
|
||||
# EIGEN3_INCLUDE_DIR - Directory where signature_of_eigen3_matrix_library can be found
|
||||
# EIGEN3_INCLUDE_DIRS - List of directories of Eigen and it's dependencies
|
||||
# EIGEN3_ROOT_DIR - The base directory of Eigen
|
||||
# EIGEN3_VERSION_STRING - A human-readable string containing the version
|
||||
# EIGEN3_VERSION_MAJOR - The major version of Eigen
|
||||
# EIGEN3_VERSION_MINOR - The minor version of Eigen
|
||||
# EIGEN3_VERSION_PATCH - The patch version of Eigen
|
||||
@PACKAGE_INIT@
|
||||
|
||||
set ( EIGEN3_FOUND 1 )
|
||||
set ( EIGEN3_USE_FILE "${CMAKE_CURRENT_LIST_DIR}/UseEigen3.cmake" )
|
||||
include ("${CMAKE_CURRENT_LIST_DIR}/Eigen3Targets.cmake")
|
||||
|
||||
set ( EIGEN3_DEFINITIONS "@EIGEN_DEFINITIONS@" )
|
||||
set ( EIGEN3_INCLUDE_DIR "@EIGEN_INCLUDE_DIR@" )
|
||||
set ( EIGEN3_INCLUDE_DIRS "@EIGEN_INCLUDE_DIRS@" )
|
||||
set ( EIGEN3_ROOT_DIR "@EIGEN_ROOT_DIR@" )
|
||||
# Legacy variables, do *not* use. May be removed in the future.
|
||||
|
||||
set ( EIGEN3_VERSION_STRING "@EIGEN_VERSION_STRING@" )
|
||||
set ( EIGEN3_VERSION_MAJOR "@EIGEN_VERSION_MAJOR@" )
|
||||
set ( EIGEN3_VERSION_MINOR "@EIGEN_VERSION_MINOR@" )
|
||||
set ( EIGEN3_VERSION_PATCH "@EIGEN_VERSION_PATCH@" )
|
||||
set (EIGEN3_FOUND 1)
|
||||
set (EIGEN3_USE_FILE "${CMAKE_CURRENT_LIST_DIR}/UseEigen3.cmake")
|
||||
|
||||
set (EIGEN3_DEFINITIONS "@EIGEN_DEFINITIONS@")
|
||||
set (EIGEN3_INCLUDE_DIR "@PACKAGE_EIGEN_INCLUDE_DIR@")
|
||||
set (EIGEN3_INCLUDE_DIRS "@PACKAGE_EIGEN_INCLUDE_DIR@")
|
||||
set (EIGEN3_ROOT_DIR "@PACKAGE_EIGEN_ROOT_DIR@")
|
||||
|
||||
set (EIGEN3_VERSION_STRING "@EIGEN_VERSION_STRING@")
|
||||
set (EIGEN3_VERSION_MAJOR "@EIGEN_VERSION_MAJOR@")
|
||||
set (EIGEN3_VERSION_MINOR "@EIGEN_VERSION_MINOR@")
|
||||
set (EIGEN3_VERSION_PATCH "@EIGEN_VERSION_PATCH@")
|
||||
|
||||
30
cmake/Eigen3ConfigLegacy.cmake.in
Normal file
30
cmake/Eigen3ConfigLegacy.cmake.in
Normal file
@@ -0,0 +1,30 @@
|
||||
# -*- cmake -*-
|
||||
#
|
||||
# Eigen3Config.cmake(.in)
|
||||
|
||||
# Use the following variables to compile and link against Eigen:
|
||||
# EIGEN3_FOUND - True if Eigen was found on your system
|
||||
# EIGEN3_USE_FILE - The file making Eigen usable
|
||||
# EIGEN3_DEFINITIONS - Definitions needed to build with Eigen
|
||||
# EIGEN3_INCLUDE_DIR - Directory where signature_of_eigen3_matrix_library can be found
|
||||
# EIGEN3_INCLUDE_DIRS - List of directories of Eigen and it's dependencies
|
||||
# EIGEN3_ROOT_DIR - The base directory of Eigen
|
||||
# EIGEN3_VERSION_STRING - A human-readable string containing the version
|
||||
# EIGEN3_VERSION_MAJOR - The major version of Eigen
|
||||
# EIGEN3_VERSION_MINOR - The minor version of Eigen
|
||||
# EIGEN3_VERSION_PATCH - The patch version of Eigen
|
||||
|
||||
@PACKAGE_INIT@
|
||||
|
||||
set ( EIGEN3_FOUND 1 )
|
||||
set ( EIGEN3_USE_FILE "${CMAKE_CURRENT_LIST_DIR}/UseEigen3.cmake" )
|
||||
|
||||
set ( EIGEN3_DEFINITIONS "@EIGEN_DEFINITIONS@" )
|
||||
set ( EIGEN3_INCLUDE_DIR "@PACKAGE_EIGEN_INCLUDE_DIR@" )
|
||||
set ( EIGEN3_INCLUDE_DIRS "@PACKAGE_EIGEN_INCLUDE_DIR@" )
|
||||
set ( EIGEN3_ROOT_DIR "@PACKAGE_EIGEN_ROOT_DIR@" )
|
||||
|
||||
set ( EIGEN3_VERSION_STRING "@EIGEN_VERSION_STRING@" )
|
||||
set ( EIGEN3_VERSION_MAJOR "@EIGEN_VERSION_MAJOR@" )
|
||||
set ( EIGEN3_VERSION_MINOR "@EIGEN_VERSION_MINOR@" )
|
||||
set ( EIGEN3_VERSION_PATCH "@EIGEN_VERSION_PATCH@" )
|
||||
@@ -26,6 +26,7 @@ namespace Eigen {
|
||||
- \subpage TopicPitfalls
|
||||
- \subpage TopicTemplateKeyword
|
||||
- \subpage UserManual_UnderstandingEigen
|
||||
- \subpage TopicCMakeGuide
|
||||
*/
|
||||
|
||||
/** \page UserManual_UnderstandingEigen Understanding Eigen
|
||||
|
||||
@@ -97,31 +97,35 @@ run time. However, these assertions do cost time and can thus be turned off.
|
||||
|
||||
\section TopicPreprocessorDirectivesPerformance Alignment, vectorization and performance tweaking
|
||||
|
||||
- \b EIGEN_MALLOC_ALREADY_ALIGNED - Can be set to 0 or 1 to tell whether default system \c malloc already
|
||||
- \b \c EIGEN_MALLOC_ALREADY_ALIGNED - Can be set to 0 or 1 to tell whether default system \c malloc already
|
||||
returns aligned buffers. In not defined, then this information is automatically deduced from the compiler
|
||||
and system preprocessor tokens.
|
||||
- \b EIGEN_DONT_ALIGN - disables alignment completely. %Eigen will not try to align its objects and does not
|
||||
expect that any objects passed to it are aligned. This will turn off vectorization. Not defined by default.
|
||||
- \b EIGEN_DONT_ALIGN_STATICALLY - disables alignment of arrays on the stack. Not defined by default, unless
|
||||
\c EIGEN_DONT_ALIGN is defined.
|
||||
- \b EIGEN_DONT_PARALLELIZE - if defined, this disables multi-threading. This is only relevant if you enabled OpenMP.
|
||||
- \b \c EIGEN_MAX_ALIGN_BYTES - Must be a power of two, or 0. Defines an upper bound on the memory boundary in bytes on which dynamically and statically allocated data may be aligned by %Eigen. If not defined, a default value is automatically computed based on architecture, compiler, and OS.
|
||||
This option is typically used to enforce binary compatibility between code/libraries compiled with different SIMD options. For instance, one may compile AVX code and enforce ABI compatibility with existing SSE code by defining \c EIGEN_MAX_ALIGN_BYTES=16. In the other way round, since by default AVX implies 32 bytes alignment for best performance, one can compile SSE code to be ABI compatible with AVX code by defining \c EIGEN_MAX_ALIGN_BYTES=32.
|
||||
- \b \c EIGEN_MAX_STATIC_ALIGN_BYTES - Same as \c EIGEN_MAX_ALIGN_BYTES but for statically allocated data only. By default, if only \c EIGEN_MAX_ALIGN_BYTES is defined, then \c EIGEN_MAX_STATIC_ALIGN_BYTES == \c EIGEN_MAX_ALIGN_BYTES, otherwise a default value is automatically computed based on architecture, compiler, and OS (can be smaller than the default value of EIGEN_MAX_ALIGN_BYTES on architectures that do not support stack alignment).
|
||||
Let us emphasize that \c EIGEN_MAX_*_ALIGN_BYTES define only a diserable upper bound. In practice data is aligned to largest power-of-two common divisor of \c EIGEN_MAX_STATIC_ALIGN_BYTES and the size of the data, such that memory is not wasted.
|
||||
- \b \c EIGEN_DONT_PARALLELIZE - if defined, this disables multi-threading. This is only relevant if you enabled OpenMP.
|
||||
See \ref TopicMultiThreading for details.
|
||||
- \b EIGEN_DONT_VECTORIZE - disables explicit vectorization when defined. Not defined by default, unless
|
||||
alignment is disabled by %Eigen's platform test or the user defining \c EIGEN_DONT_ALIGN.
|
||||
- \b EIGEN_UNALIGNED_VECTORIZE - disables/enables vectorization with unaligned stores. Default is 1 (enabled).
|
||||
- \b \c EIGEN_UNALIGNED_VECTORIZE - disables/enables vectorization with unaligned stores. Default is 1 (enabled).
|
||||
If set to 0 (disabled), then expression for which the destination cannot be aligned are not vectorized (e.g., unaligned
|
||||
small fixed size vectors or matrices)
|
||||
- \b EIGEN_FAST_MATH - enables some optimizations which might affect the accuracy of the result. This currently
|
||||
- \b \c EIGEN_FAST_MATH - enables some optimizations which might affect the accuracy of the result. This currently
|
||||
enables the SSE vectorization of sin() and cos(), and speedups sqrt() for single precision. Defined to 1 by default.
|
||||
Define it to 0 to disable.
|
||||
- \b EIGEN_UNROLLING_LIMIT - defines the size of a loop to enable meta unrolling. Set it to zero to disable
|
||||
- \b \c EIGEN_UNROLLING_LIMIT - defines the size of a loop to enable meta unrolling. Set it to zero to disable
|
||||
unrolling. The size of a loop here is expressed in %Eigen's own notion of "number of FLOPS", it does not
|
||||
correspond to the number of iterations or the number of instructions. The default is value 100.
|
||||
- \b EIGEN_STACK_ALLOCATION_LIMIT - defines the maximum bytes for a buffer to be allocated on the stack. For internal
|
||||
- \b \c EIGEN_STACK_ALLOCATION_LIMIT - defines the maximum bytes for a buffer to be allocated on the stack. For internal
|
||||
temporary buffers, dynamic memory allocation is employed as a fall back. For fixed-size matrices or arrays, exceeding
|
||||
this threshold raises a compile time assertion. Use 0 to set no limit. Default is 128 KB.
|
||||
|
||||
|
||||
- \c EIGEN_DONT_ALIGN - Deprecated, it is a synonym for \c EIGEN_MAX_ALIGN_BYTES=0. It disables alignment completely. %Eigen will not try to align its objects and does not expect that any objects passed to it are aligned. This will turn off vectorization if \b EIGEN_UNALIGNED_VECTORIZE=1. Not defined by default.
|
||||
- \c EIGEN_DONT_ALIGN_STATICALLY - Deprecated, it is a synonym for \c EIGEN_MAX_STATIC_ALIGN_BYTES=0. It disables alignment of arrays on the stack. Not defined by default, unless \c EIGEN_DONT_ALIGN is defined.
|
||||
|
||||
|
||||
\section TopicPreprocessorDirectivesPlugins Plugins
|
||||
|
||||
It is possible to add new methods to many fundamental classes in %Eigen by writing a plugin. As explained in
|
||||
|
||||
52
doc/TopicCMakeGuide.dox
Normal file
52
doc/TopicCMakeGuide.dox
Normal file
@@ -0,0 +1,52 @@
|
||||
namespace Eigen {
|
||||
|
||||
/**
|
||||
|
||||
\page TopicCMakeGuide Using %Eigen in CMake Projects
|
||||
|
||||
%Eigen provides native CMake support which allows the library to be easily
|
||||
used in CMake projects.
|
||||
|
||||
\note %CMake 3.0 (or later) is required to enable this functionality.
|
||||
|
||||
%Eigen exports a CMake target called `Eigen3::Eigen` which can be imported
|
||||
using the `find_package` CMake command and used by calling
|
||||
`target_link_libraries` as in the following example:
|
||||
\code{.cmake}
|
||||
cmake_minimum_required (VERSION 3.0)
|
||||
project (myproject)
|
||||
|
||||
find_package (Eigen3 3.3 REQUIRED NO_MODULE)
|
||||
|
||||
add_executable (example example.cpp)
|
||||
target_link_libraries (example Eigen3::Eigen)
|
||||
\endcode
|
||||
|
||||
The above code snippet must be placed in a file called `CMakeLists.txt` alongside
|
||||
`example.cpp`. After running
|
||||
\code{.sh}
|
||||
$ cmake path-to-example-directory
|
||||
\endcode
|
||||
CMake will produce project files that generate an executable called `example`
|
||||
which requires at least version 3.3 of %Eigen. Here, `path-to-example-directory`
|
||||
is the path to the directory that contains both `CMakeLists.txt` and
|
||||
`example.cpp`.
|
||||
|
||||
If you have multiple installed version of %Eigen, you can pick your favorite one by setting the \c Eigen3_DIR cmake's variable to the respective path containing the \c Eigen3*.cmake files. For instance:
|
||||
\code
|
||||
cmake path-to-example-directory -DEigen3_DIR=$HOME/mypackages/share/eigen3/cmake/
|
||||
\endcode
|
||||
|
||||
If the `REQUIRED` option is omitted when locating %Eigen using
|
||||
`find_package`, one can check whether the package was found as follows:
|
||||
\code{.cmake}
|
||||
find_package (Eigen3 3.3 NO_MODULE)
|
||||
|
||||
if (TARGET Eigen3::Eigen)
|
||||
# Use the imported target
|
||||
endif (TARGET Eigen3::Eigen)
|
||||
\endcode
|
||||
|
||||
*/
|
||||
|
||||
}
|
||||
@@ -134,6 +134,12 @@ template<typename MatrixType> void comparisons(const MatrixType& m)
|
||||
// count
|
||||
VERIFY(((m1.array().abs()+1)>RealScalar(0.1)).count() == rows*cols);
|
||||
|
||||
// and/or
|
||||
VERIFY( ((m1.array()<RealScalar(0)).matrix() && (m1.array()>RealScalar(0)).matrix()).count() == 0);
|
||||
VERIFY( ((m1.array()<RealScalar(0)).matrix() || (m1.array()>=RealScalar(0)).matrix()).count() == rows*cols);
|
||||
RealScalar a = m1.cwiseAbs().mean();
|
||||
VERIFY( ((m1.array()<-a).matrix() || (m1.array()>a).matrix()).count() == (m1.cwiseAbs().array()>a).count());
|
||||
|
||||
typedef Matrix<typename MatrixType::Index, Dynamic, 1> VectorOfIndices;
|
||||
|
||||
// TODO allows colwise/rowwise for array
|
||||
|
||||
@@ -90,7 +90,7 @@ inline void on_temporary_creation(long int size) {
|
||||
#define VERIFY_EVALUATION_COUNT(XPR,N) {\
|
||||
nb_temporaries = 0; \
|
||||
XPR; \
|
||||
if(nb_temporaries!=N) std::cerr << "nb_temporaries == " << nb_temporaries << "\n"; \
|
||||
if(nb_temporaries!=N) { std::cerr << "nb_temporaries == " << nb_temporaries << "\n"; }\
|
||||
VERIFY( (#XPR) && nb_temporaries==N ); \
|
||||
}
|
||||
|
||||
|
||||
@@ -591,7 +591,7 @@ template<typename Scalar> void packetmath_scatter_gather()
|
||||
int stride = internal::random<int>(1,20);
|
||||
|
||||
EIGEN_ALIGN_MAX Scalar buffer[PacketSize*20];
|
||||
memset(buffer, 0, 20*sizeof(Packet));
|
||||
memset(buffer, 0, 20*PacketSize*sizeof(Scalar));
|
||||
Packet packet = internal::pload<Packet>(data1);
|
||||
internal::pscatter<Scalar, Packet>(buffer, packet, stride);
|
||||
|
||||
|
||||
@@ -98,6 +98,16 @@ template<typename MatrixType> void product_extra(const MatrixType& m)
|
||||
// regression test
|
||||
MatrixType tmp = m1 * m1.adjoint() * s1;
|
||||
VERIFY_IS_APPROX(tmp, m1 * m1.adjoint() * s1);
|
||||
|
||||
// regression test for bug 1343, assignment to arrays
|
||||
Array<Scalar,Dynamic,1> a1 = m1 * vc2;
|
||||
VERIFY_IS_APPROX(a1.matrix(),m1*vc2);
|
||||
Array<Scalar,Dynamic,1> a2 = s1 * (m1 * vc2);
|
||||
VERIFY_IS_APPROX(a2.matrix(),s1*m1*vc2);
|
||||
Array<Scalar,1,Dynamic> a3 = v1 * m1;
|
||||
VERIFY_IS_APPROX(a3.matrix(),v1*m1);
|
||||
Array<Scalar,Dynamic,Dynamic> a4 = m1 * m2.adjoint();
|
||||
VERIFY_IS_APPROX(a4.matrix(),m1*m2.adjoint());
|
||||
}
|
||||
|
||||
// Regression test for bug reported at http://forum.kde.org/viewtopic.php?f=74&t=96947
|
||||
|
||||
@@ -136,6 +136,10 @@ template<typename MatrixType> void product_notemporary(const MatrixType& m)
|
||||
VERIFY_EVALUATION_COUNT( rm3.noalias() -= (cv1) * (rv1 * m1), 1 );
|
||||
VERIFY_EVALUATION_COUNT( rm3.noalias() = (m1*cv1) * (rv1 * m1), 2 );
|
||||
VERIFY_EVALUATION_COUNT( rm3.noalias() += (m1*cv1) * (rv1 * m1), 2 );
|
||||
|
||||
// Check nested products
|
||||
VERIFY_EVALUATION_COUNT( cvres.noalias() = m1.adjoint() * m1 * cv1, 1 );
|
||||
VERIFY_EVALUATION_COUNT( rvres.noalias() = rv1 * (m1 * m2.adjoint()), 1 );
|
||||
}
|
||||
|
||||
void test_product_notemporary()
|
||||
|
||||
@@ -21,7 +21,9 @@ template<typename MatrixType> void selfadjoint(const MatrixType& m)
|
||||
Index cols = m.cols();
|
||||
|
||||
MatrixType m1 = MatrixType::Random(rows, cols),
|
||||
m3(rows, cols);
|
||||
m2 = MatrixType::Random(rows, cols),
|
||||
m3(rows, cols),
|
||||
m4(rows, cols);
|
||||
|
||||
m1.diagonal() = m1.diagonal().real().template cast<Scalar>();
|
||||
|
||||
@@ -30,10 +32,19 @@ template<typename MatrixType> void selfadjoint(const MatrixType& m)
|
||||
VERIFY_IS_APPROX(MatrixType(m3.template triangularView<Upper>()), MatrixType(m1.template triangularView<Upper>()));
|
||||
VERIFY_IS_APPROX(m3, m3.adjoint());
|
||||
|
||||
|
||||
m3 = m1.template selfadjointView<Lower>();
|
||||
VERIFY_IS_APPROX(MatrixType(m3.template triangularView<Lower>()), MatrixType(m1.template triangularView<Lower>()));
|
||||
VERIFY_IS_APPROX(m3, m3.adjoint());
|
||||
|
||||
m3 = m1.template selfadjointView<Upper>();
|
||||
m4 = m2;
|
||||
m4 += m1.template selfadjointView<Upper>();
|
||||
VERIFY_IS_APPROX(m4, m2+m3);
|
||||
|
||||
m3 = m1.template selfadjointView<Lower>();
|
||||
m4 = m2;
|
||||
m4 -= m1.template selfadjointView<Lower>();
|
||||
VERIFY_IS_APPROX(m4, m2-m3);
|
||||
}
|
||||
|
||||
void bug_159()
|
||||
|
||||
@@ -217,6 +217,51 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
|
||||
refM1(it.row(), it.col()) += s1;
|
||||
VERIFY_IS_APPROX(m1, refM1);
|
||||
}
|
||||
|
||||
// and/or
|
||||
{
|
||||
typedef SparseMatrix<bool, SparseMatrixType::Options, typename SparseMatrixType::StorageIndex> SpBool;
|
||||
SpBool mb1 = m1.real().template cast<bool>();
|
||||
SpBool mb2 = m2.real().template cast<bool>();
|
||||
VERIFY_IS_EQUAL(mb1.template cast<int>().sum(), refM1.real().template cast<bool>().count());
|
||||
VERIFY_IS_EQUAL((mb1 && mb2).template cast<int>().sum(), (refM1.real().template cast<bool>() && refM2.real().template cast<bool>()).count());
|
||||
VERIFY_IS_EQUAL((mb1 || mb2).template cast<int>().sum(), (refM1.real().template cast<bool>() || refM2.real().template cast<bool>()).count());
|
||||
SpBool mb3 = mb1 && mb2;
|
||||
if(mb1.coeffs().all() && mb2.coeffs().all())
|
||||
{
|
||||
VERIFY_IS_EQUAL(mb3.nonZeros(), (refM1.real().template cast<bool>() && refM2.real().template cast<bool>()).count());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// test reverse iterators
|
||||
{
|
||||
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
|
||||
SparseMatrixType m2(rows, cols);
|
||||
initSparse<Scalar>(density, refMat2, m2);
|
||||
std::vector<Scalar> ref_value(m2.innerSize());
|
||||
std::vector<Index> ref_index(m2.innerSize());
|
||||
if(internal::random<bool>())
|
||||
m2.makeCompressed();
|
||||
for(Index j = 0; j<m2.outerSize(); ++j)
|
||||
{
|
||||
Index count_forward = 0;
|
||||
|
||||
for(typename SparseMatrixType::InnerIterator it(m2,j); it; ++it)
|
||||
{
|
||||
ref_value[ref_value.size()-1-count_forward] = it.value();
|
||||
ref_index[ref_index.size()-1-count_forward] = it.index();
|
||||
count_forward++;
|
||||
}
|
||||
Index count_reverse = 0;
|
||||
for(typename SparseMatrixType::ReverseInnerIterator it(m2,j); it; --it)
|
||||
{
|
||||
VERIFY_IS_APPROX( std::abs(ref_value[ref_value.size()-count_forward+count_reverse])+1, std::abs(it.value())+1);
|
||||
VERIFY_IS_EQUAL( ref_index[ref_index.size()-count_forward+count_reverse] , it.index());
|
||||
count_reverse++;
|
||||
}
|
||||
VERIFY_IS_EQUAL(count_forward, count_reverse);
|
||||
}
|
||||
}
|
||||
|
||||
// test transpose
|
||||
|
||||
@@ -223,6 +223,33 @@ template<typename SparseMatrixType> void sparse_block(const SparseMatrixType& re
|
||||
|
||||
VERIFY_IS_APPROX(m2.block(r0,c0,r1,c1), refMat2.block(r0,c0,r1,c1));
|
||||
VERIFY_IS_APPROX((2*m2).block(r0,c0,r1,c1), (2*refMat2).block(r0,c0,r1,c1));
|
||||
|
||||
if(m2.nonZeros()>0)
|
||||
{
|
||||
VERIFY_IS_APPROX(m2, refMat2);
|
||||
SparseMatrixType m3(rows, cols);
|
||||
DenseMatrix refMat3(rows, cols); refMat3.setZero();
|
||||
Index n = internal::random<Index>(1,10);
|
||||
for(Index k=0; k<n; ++k)
|
||||
{
|
||||
Index o1 = internal::random<Index>(0,outer-1);
|
||||
Index o2 = internal::random<Index>(0,outer-1);
|
||||
if(SparseMatrixType::IsRowMajor)
|
||||
{
|
||||
m3.innerVector(o1) = m2.row(o2);
|
||||
refMat3.row(o1) = refMat2.row(o2);
|
||||
}
|
||||
else
|
||||
{
|
||||
m3.innerVector(o1) = m2.col(o2);
|
||||
refMat3.col(o1) = refMat2.col(o2);
|
||||
}
|
||||
if(internal::random<bool>())
|
||||
m3.makeCompressed();
|
||||
}
|
||||
if(m3.nonZeros()>0)
|
||||
VERIFY_IS_APPROX(m3, refMat3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user