Fix realview

libeigen/eigen!2062
This commit is contained in:
Charles Schlosser
2025-11-08 13:36:43 +00:00
parent ffcd7bdbd6
commit 8b85f5933a
6 changed files with 297 additions and 132 deletions

View File

@@ -367,11 +367,11 @@ class DenseBase
EIGEN_DEVICE_FUNC inline bool allFinite() const;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*=(const Scalar& other);
template <bool Enable = !internal::is_same<Scalar, RealScalar>::value, typename = std::enable_if_t<Enable>>
template <bool Enable = internal::complex_array_access<Scalar>::value, typename = std::enable_if_t<Enable>>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*=(const RealScalar& other);
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator/=(const Scalar& other);
template <bool Enable = !internal::is_same<Scalar, RealScalar>::value, typename = std::enable_if_t<Enable>>
template <bool Enable = internal::complex_array_access<Scalar>::value, typename = std::enable_if_t<Enable>>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator/=(const RealScalar& other);
typedef internal::add_const_on_value_type_t<typename internal::eval<Derived>::type> EvalReturnType;

View File

@@ -20,10 +20,7 @@ namespace internal {
template <typename Derived, typename Scalar = typename traits<Derived>::Scalar>
struct squared_norm_impl {
using Real = typename NumTraits<Scalar>::Real;
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Real run(const Derived& a) {
Scalar result = a.unaryExpr(squared_norm_functor<Scalar>()).sum();
return numext::real(result) + numext::imag(result);
}
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Real run(const Derived& a) { return a.realView().cwiseAbs2().sum(); }
};
template <typename Derived>

View File

@@ -17,20 +17,16 @@ namespace Eigen {
namespace internal {
// Vectorized assignment to RealView requires array-oriented access to the real and imaginary components.
// Write access and vectorization requires array-oriented access to the real and imaginary components.
// From https://en.cppreference.com/w/cpp/numeric/complex.html:
// For any pointer to an element of an array of std::complex<T> named p and any valid array index i,
// reinterpret_cast<T*>(p)[2 * i] is the real part of the complex number p[i], and
// reinterpret_cast<T*>(p)[2 * i + 1] is the imaginary part of the complex number p[i].
template <typename ComplexScalar>
template <typename T>
struct complex_array_access : std::false_type {};
template <>
struct complex_array_access<std::complex<float>> : std::true_type {};
template <>
struct complex_array_access<std::complex<double>> : std::true_type {};
template <>
struct complex_array_access<std::complex<long double>> : std::true_type {};
template <typename T>
struct complex_array_access<std::complex<T>> : std::true_type {};
template <typename Xpr>
struct traits<RealView<Xpr>> : public traits<Xpr> {
@@ -40,13 +36,17 @@ struct traits<RealView<Xpr>> : public traits<Xpr> {
if (size_as_int == Dynamic) return Dynamic;
return times_two ? (2 * size_as_int) : size_as_int;
}
using Base = traits<Xpr>;
using ComplexScalar = typename Base::Scalar;
using Scalar = typename NumTraits<ComplexScalar>::Real;
static constexpr int ActualDirectAccessBit = complex_array_access<ComplexScalar>::value ? DirectAccessBit : 0;
static constexpr bool ArrayAccess = complex_array_access<ComplexScalar>::value;
static constexpr int ActualDirectAccessBit = ArrayAccess ? DirectAccessBit : 0;
static constexpr int ActualLvaluebit = !std::is_const<Xpr>::value && ArrayAccess ? LvalueBit : 0;
static constexpr int ActualPacketAccessBit = packet_traits<Scalar>::Vectorizable ? PacketAccessBit : 0;
static constexpr int FlagMask =
ActualDirectAccessBit | ActualPacketAccessBit | HereditaryBits | LinearAccessBit | LvalueBit;
ActualDirectAccessBit | ActualLvaluebit | ActualPacketAccessBit | HereditaryBits | LinearAccessBit;
static constexpr int BaseFlags = int(evaluator<Xpr>::Flags) | int(Base::Flags);
static constexpr int Flags = BaseFlags & FlagMask;
static constexpr bool IsRowMajor = Flags & RowMajorBit;
@@ -66,68 +66,84 @@ struct evaluator<RealView<Xpr>> : private evaluator<Xpr> {
using XprType = RealView<Xpr>;
using ExpressionTraits = traits<XprType>;
using ComplexScalar = typename ExpressionTraits::ComplexScalar;
using ComplexCoeffReturnType = typename BaseEvaluator::CoeffReturnType;
using Scalar = typename ExpressionTraits::Scalar;
static constexpr bool IsRowMajor = ExpressionTraits::IsRowMajor;
static constexpr int Flags = ExpressionTraits::Flags;
static constexpr int CoeffReadCost = BaseEvaluator::CoeffReadCost;
static constexpr int Alignment = BaseEvaluator::Alignment;
static constexpr bool IsRowMajor = ExpressionTraits::IsRowMajor;
static constexpr bool DirectAccess = Flags & DirectAccessBit;
using ComplexCoeffReturnType = std::conditional_t<DirectAccess, const ComplexScalar&, ComplexScalar>;
using CoeffReturnType = std::conditional_t<DirectAccess, const Scalar&, Scalar>;
EIGEN_DEVICE_FUNC explicit evaluator(XprType realView) : BaseEvaluator(realView.m_xpr) {}
template <bool Enable = std::is_reference<ComplexCoeffReturnType>::value, typename = std::enable_if_t<!Enable>>
template <bool Enable = DirectAccess, std::enable_if_t<!Enable, bool> = true>
constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const {
ComplexCoeffReturnType cscalar = BaseEvaluator::coeff(IsRowMajor ? row : row / 2, IsRowMajor ? col / 2 : col);
Index p = (IsRowMajor ? col : row) & 1;
return p ? numext::real(cscalar) : numext::imag(cscalar);
Index r = IsRowMajor ? row : row / 2;
Index c = IsRowMajor ? col / 2 : col;
bool p = (IsRowMajor ? col : row) & 1;
ComplexScalar ccoeff = BaseEvaluator::coeff(r, c);
return p ? numext::imag(ccoeff) : numext::real(ccoeff);
}
template <bool Enable = std::is_reference<ComplexCoeffReturnType>::value, typename = std::enable_if_t<Enable>>
constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index row, Index col) const {
ComplexCoeffReturnType cscalar = BaseEvaluator::coeff(IsRowMajor ? row : row / 2, IsRowMajor ? col / 2 : col);
template <bool Enable = DirectAccess, std::enable_if_t<Enable, bool> = true>
constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const {
Index r = IsRowMajor ? row : row / 2;
Index c = IsRowMajor ? col / 2 : col;
Index p = (IsRowMajor ? col : row) & 1;
return reinterpret_cast<const Scalar(&)[2]>(cscalar)[p];
ComplexCoeffReturnType ccoeff = BaseEvaluator::coeff(r, c);
return reinterpret_cast<const Scalar(&)[2]>(ccoeff)[p];
}
constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) {
ComplexScalar& cscalar = BaseEvaluator::coeffRef(IsRowMajor ? row : row / 2, IsRowMajor ? col / 2 : col);
Index p = (IsRowMajor ? col : row) & 1;
return reinterpret_cast<Scalar(&)[2]>(cscalar)[p];
}
template <bool Enable = std::is_reference<ComplexCoeffReturnType>::value, typename = std::enable_if_t<!Enable>>
template <bool Enable = DirectAccess, std::enable_if_t<!Enable, bool> = true>
constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index index) const {
ComplexCoeffReturnType cscalar = BaseEvaluator::coeff(index / 2);
Index p = index & 1;
return p ? numext::real(cscalar) : numext::imag(cscalar);
ComplexScalar ccoeff = BaseEvaluator::coeff(index / 2);
bool p = index & 1;
return p ? numext::imag(ccoeff) : numext::real(ccoeff);
}
template <bool Enable = std::is_reference<ComplexCoeffReturnType>::value, typename = std::enable_if_t<Enable>>
constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const {
ComplexCoeffReturnType cscalar = BaseEvaluator::coeff(index / 2);
template <bool Enable = DirectAccess, std::enable_if_t<Enable, bool> = true>
constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
ComplexCoeffReturnType ccoeff = BaseEvaluator::coeff(index / 2);
Index p = index & 1;
return reinterpret_cast<const Scalar(&)[2]>(cscalar)[p];
return reinterpret_cast<const Scalar(&)[2]>(ccoeff)[p];
}
constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) {
Index r = IsRowMajor ? row : row / 2;
Index c = IsRowMajor ? col / 2 : col;
Index p = (IsRowMajor ? col : row) & 1;
ComplexScalar& ccoeffRef = BaseEvaluator::coeffRef(r, c);
return reinterpret_cast<Scalar(&)[2]>(ccoeffRef)[p];
}
constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
ComplexScalar& cscalar = BaseEvaluator::coeffRef(index / 2);
ComplexScalar& ccoeffRef = BaseEvaluator::coeffRef(index / 2);
Index p = index & 1;
return reinterpret_cast<Scalar(&)[2]>(cscalar)[p];
return reinterpret_cast<Scalar(&)[2]>(ccoeffRef)[p];
}
// If the first index is odd (imaginary), discard the first scalar
// in 'result' and assign the missing scalar.
// This operation is safe as the real component of the first scalar must exist.
template <int LoadMode, typename PacketType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
constexpr int RealPacketSize = unpacket_traits<PacketType>::size;
using ComplexPacket = typename find_packet_by_size<ComplexScalar, RealPacketSize / 2>::type;
EIGEN_STATIC_ASSERT((find_packet_by_size<ComplexScalar, RealPacketSize / 2>::value),
MISSING COMPATIBLE COMPLEX PACKET TYPE)
eigen_assert(((IsRowMajor ? col : row) % 2 == 0) && "the inner index must be even");
Index crow = IsRowMajor ? row : row / 2;
Index ccol = IsRowMajor ? col / 2 : col;
ComplexPacket cpacket = BaseEvaluator::template packet<LoadMode, ComplexPacket>(crow, ccol);
return preinterpret<PacketType, ComplexPacket>(cpacket);
Index r = IsRowMajor ? row : row / 2;
Index c = IsRowMajor ? col / 2 : col;
bool p = (IsRowMajor ? col : row) & 1;
ComplexPacket cresult = BaseEvaluator::template packet<LoadMode, ComplexPacket>(r, c);
PacketType result = preinterpret<PacketType>(cresult);
if (p) {
Scalar aux[RealPacketSize + 1];
pstoreu(aux, result);
Index lastr = IsRowMajor ? row : row + RealPacketSize - 1;
Index lastc = IsRowMajor ? col + RealPacketSize - 1 : col;
aux[RealPacketSize] = coeff(lastr, lastc);
result = ploadu<PacketType>(aux + 1);
}
return result;
}
template <int LoadMode, typename PacketType>
@@ -136,28 +152,48 @@ struct evaluator<RealView<Xpr>> : private evaluator<Xpr> {
using ComplexPacket = typename find_packet_by_size<ComplexScalar, RealPacketSize / 2>::type;
EIGEN_STATIC_ASSERT((find_packet_by_size<ComplexScalar, RealPacketSize / 2>::value),
MISSING COMPATIBLE COMPLEX PACKET TYPE)
eigen_assert((index % 2 == 0) && "the index must be even");
Index cindex = index / 2;
ComplexPacket cpacket = BaseEvaluator::template packet<LoadMode, ComplexPacket>(cindex);
return preinterpret<PacketType, ComplexPacket>(cpacket);
ComplexPacket cresult = BaseEvaluator::template packet<LoadMode, ComplexPacket>(index / 2);
PacketType result = preinterpret<PacketType>(cresult);
bool p = index & 1;
if (p) {
Scalar aux[RealPacketSize + 1];
pstoreu(aux, result);
aux[RealPacketSize] = coeff(index + RealPacketSize - 1);
result = ploadu<PacketType>(aux + 1);
}
return result;
}
// The requested real packet segment forms the half-open interval [begin, end), where 'end' = 'begin' + 'count'.
// In order to access the underlying complex array, even indices must be aligned with the real components
// of the complex scalars. 'begin' and 'count' must be modified as follows:
// a) 'begin' must be rounded down to the nearest even number; and
// b) 'end' must be rounded up to the nearest even number.
template <int LoadMode, typename PacketType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
constexpr int RealPacketSize = unpacket_traits<PacketType>::size;
using ComplexPacket = typename find_packet_by_size<ComplexScalar, RealPacketSize / 2>::type;
EIGEN_STATIC_ASSERT((find_packet_by_size<ComplexScalar, RealPacketSize / 2>::value),
MISSING COMPATIBLE COMPLEX PACKET TYPE)
eigen_assert(((IsRowMajor ? col : row) % 2 == 0) && "the inner index must be even");
eigen_assert((begin % 2 == 0) && (count % 2 == 0) && "begin and count must be even");
Index crow = IsRowMajor ? row : row / 2;
Index ccol = IsRowMajor ? col / 2 : col;
Index cbegin = begin / 2;
Index ccount = count / 2;
ComplexPacket cpacket = BaseEvaluator::template packetSegment<LoadMode, ComplexPacket>(crow, ccol, cbegin, ccount);
return preinterpret<PacketType, ComplexPacket>(cpacket);
Index actualBegin = numext::round_down(begin, 2);
Index actualEnd = numext::round_down(begin + count + 1, 2);
Index actualCount = actualEnd - actualBegin;
Index r = IsRowMajor ? row : row / 2;
Index c = IsRowMajor ? col / 2 : col;
ComplexPacket cresult =
BaseEvaluator::template packetSegment<LoadMode, ComplexPacket>(r, c, actualBegin / 2, actualCount / 2);
PacketType result = preinterpret<PacketType>(cresult);
bool p = (IsRowMajor ? col : row) & 1;
if (p) {
Scalar aux[RealPacketSize + 1] = {};
pstoreu(aux, result);
Index lastr = IsRowMajor ? row : row + actualEnd - 1;
Index lastc = IsRowMajor ? col + actualEnd - 1 : col;
aux[actualEnd] = coeff(lastr, lastc);
result = ploadu<PacketType>(aux + 1);
}
return result;
}
template <int LoadMode, typename PacketType>
@@ -166,14 +202,20 @@ struct evaluator<RealView<Xpr>> : private evaluator<Xpr> {
using ComplexPacket = typename find_packet_by_size<ComplexScalar, RealPacketSize / 2>::type;
EIGEN_STATIC_ASSERT((find_packet_by_size<ComplexScalar, RealPacketSize / 2>::value),
MISSING COMPATIBLE COMPLEX PACKET TYPE)
eigen_assert((index % 2 == 0) && "the index must be even");
eigen_assert((begin % 2 == 0) && (count % 2 == 0) && "begin and count must be even");
Index cindex = index / 2;
Index cbegin = begin / 2;
Index ccount = count / 2;
ComplexPacket cpacket = BaseEvaluator::template packetSegment<LoadMode, ComplexPacket>(cindex, cbegin, ccount);
return preinterpret<PacketType, ComplexPacket>(cpacket);
Index actualBegin = numext::round_down(begin, 2);
Index actualEnd = numext::round_down(begin + count + 1, 2);
Index actualCount = actualEnd - actualBegin;
ComplexPacket cresult =
BaseEvaluator::template packetSegment<LoadMode, ComplexPacket>(index / 2, actualBegin / 2, actualCount / 2);
PacketType result = preinterpret<PacketType>(cresult);
bool p = index & 1;
if (p) {
Scalar aux[RealPacketSize + 1] = {};
pstoreu(aux, result);
aux[actualEnd] = coeff(index + actualEnd - 1);
result = ploadu<PacketType>(aux + 1);
}
return result;
}
};
@@ -211,7 +253,7 @@ class RealView : public internal::dense_xpr_base<RealView<Xpr>>::type {
EIGEN_DEVICE_FUNC RealView& operator=(const DenseBase<OtherDerived>& other);
protected:
friend struct internal::evaluator<RealView<Xpr>>;
friend struct internal::evaluator<RealView>;
Xpr& m_xpr;
};

View File

@@ -106,26 +106,6 @@ struct functor_traits<scalar_abs2_op<Scalar>> {
};
};
template <typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
struct squared_norm_functor {
typedef Scalar result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const {
return Scalar(numext::real(a) * numext::real(a), numext::imag(a) * numext::imag(a));
}
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const {
return Packet(pmul(a.v, a.v));
}
};
template <typename Scalar>
struct squared_norm_functor<Scalar, false> : scalar_abs2_op<Scalar> {};
template <typename Scalar>
struct functor_traits<squared_norm_functor<Scalar>> {
using Real = typename NumTraits<Scalar>::Real;
enum { Cost = NumTraits<Real>::MulCost, PacketAccess = packet_traits<Real>::HasMul };
};
/** \internal
* \brief Template functor to compute the conjugate of a complex value
*

View File

@@ -517,6 +517,9 @@ struct eigen_zero_impl;
template <typename Packet>
struct has_packet_segment : std::false_type {};
template <typename T>
struct complex_array_access;
} // namespace internal
} // end namespace Eigen