mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Revert accidental changes from !2212 squash merge
libeigen/eigen!2214 Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
This commit is contained in:
@@ -810,12 +810,10 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprTyp
|
||||
(dst.size() == 0 || (DstXprType::IsVectorAtCompileTime ? (dst.size() == src.size())
|
||||
: (dst.rows() == dstRows && dst.cols() == dstCols))) &&
|
||||
"Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
|
||||
// Allow resizing of default-constructed (empty) destinations.
|
||||
if (dst.size() == 0) dst.resize(dstRows, dstCols);
|
||||
#else
|
||||
dst.resize(dstRows, dstCols);
|
||||
#endif
|
||||
eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -685,8 +685,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type {
|
||||
eigen_assert((this->size() == 0 || (IsVectorAtCompileTime ? (this->size() == other.size())
|
||||
: (rows() == other.rows() && cols() == other.cols()))) &&
|
||||
"Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
|
||||
// Allow resizing of default-constructed (empty) destinations.
|
||||
if (this->size() == 0) resizeLike(other);
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(other);
|
||||
#else
|
||||
resizeLike(other);
|
||||
#endif
|
||||
|
||||
@@ -27,23 +27,11 @@ struct complex_packet_wrapper {
|
||||
RealPacketT v;
|
||||
};
|
||||
|
||||
// --- Primary complex packet aliases ---
|
||||
constexpr int kComplexFloatSize = kFloatPacketSize / 2; // 2, 4, or 8
|
||||
constexpr int kComplexDoubleSize = kDoublePacketSize / 2; // 1, 2, or 4
|
||||
using PacketXcf = complex_packet_wrapper<float, kComplexFloatSize>;
|
||||
using PacketXcd = complex_packet_wrapper<double, kComplexDoubleSize>;
|
||||
|
||||
// Sub-packet types needed for reductions at larger sizes.
|
||||
// When PacketXcf IS already a given size, we skip the alias to avoid duplicates.
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 32
|
||||
using Packet2cf = complex_packet_wrapper<float, 2>;
|
||||
#endif
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64
|
||||
using Packet8cf = complex_packet_wrapper<float, 8>;
|
||||
using Packet4cf = complex_packet_wrapper<float, 4>;
|
||||
#endif
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64
|
||||
using Packet2cf = complex_packet_wrapper<float, 2>;
|
||||
using Packet4cd = complex_packet_wrapper<double, 4>;
|
||||
using Packet2cd = complex_packet_wrapper<double, 2>;
|
||||
#endif
|
||||
|
||||
struct generic_complex_packet_traits : default_packet_traits {
|
||||
enum {
|
||||
@@ -70,39 +58,39 @@ struct generic_complex_packet_traits : default_packet_traits {
|
||||
|
||||
template <>
|
||||
struct packet_traits<std::complex<float>> : generic_complex_packet_traits {
|
||||
using type = PacketXcf;
|
||||
using half = PacketXcf;
|
||||
using type = Packet8cf;
|
||||
using half = Packet8cf;
|
||||
enum {
|
||||
size = kComplexFloatSize,
|
||||
size = 8,
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct unpacket_traits<PacketXcf> : generic_unpacket_traits {
|
||||
struct unpacket_traits<Packet8cf> : generic_unpacket_traits {
|
||||
using type = std::complex<float>;
|
||||
using half = PacketXcf;
|
||||
using as_real = PacketXf;
|
||||
using half = Packet8cf;
|
||||
using as_real = Packet16f;
|
||||
enum {
|
||||
size = kComplexFloatSize,
|
||||
size = 8,
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct packet_traits<std::complex<double>> : generic_complex_packet_traits {
|
||||
using type = PacketXcd;
|
||||
using half = PacketXcd;
|
||||
using type = Packet4cd;
|
||||
using half = Packet4cd;
|
||||
enum {
|
||||
size = kComplexDoubleSize,
|
||||
size = 4,
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct unpacket_traits<PacketXcd> : generic_unpacket_traits {
|
||||
struct unpacket_traits<Packet4cd> : generic_unpacket_traits {
|
||||
using type = std::complex<double>;
|
||||
using half = PacketXcd;
|
||||
using as_real = PacketXd;
|
||||
using half = Packet4cd;
|
||||
using as_real = Packet8d;
|
||||
enum {
|
||||
size = kComplexDoubleSize,
|
||||
size = 4,
|
||||
};
|
||||
};
|
||||
|
||||
@@ -127,58 +115,24 @@ struct unpacket_traits<PacketXcd> : generic_unpacket_traits {
|
||||
pstore(&numext::real_ref(*to), from.v); \
|
||||
}
|
||||
|
||||
EIGEN_CLANG_COMPLEX_LOAD_STORE(PacketXcf);
|
||||
EIGEN_CLANG_COMPLEX_LOAD_STORE(PacketXcd);
|
||||
EIGEN_CLANG_COMPLEX_LOAD_STORE(Packet8cf);
|
||||
EIGEN_CLANG_COMPLEX_LOAD_STORE(Packet4cd);
|
||||
#undef EIGEN_CLANG_COMPLEX_LOAD_STORE
|
||||
|
||||
// --- pset1 for complex ---
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pset1<PacketXcf>(const std::complex<float>& from) {
|
||||
EIGEN_STRONG_INLINE Packet8cf pset1<Packet8cf>(const std::complex<float>& from) {
|
||||
const float re = numext::real(from);
|
||||
const float im = numext::imag(from);
|
||||
return PacketXcf(PacketXf{re, im, re, im});
|
||||
return Packet8cf(Packet16f{re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im});
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pset1<PacketXcd>(const std::complex<double>& from) {
|
||||
EIGEN_STRONG_INLINE Packet4cd pset1<Packet4cd>(const std::complex<double>& from) {
|
||||
const double re = numext::real(from);
|
||||
const double im = numext::imag(from);
|
||||
return PacketXcd(PacketXd{re, im});
|
||||
return Packet4cd(Packet8d{re, im, re, im, re, im, re, im});
|
||||
}
|
||||
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pset1<PacketXcf>(const std::complex<float>& from) {
|
||||
const float re = numext::real(from);
|
||||
const float im = numext::imag(from);
|
||||
return PacketXcf(PacketXf{re, im, re, im, re, im, re, im});
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pset1<PacketXcd>(const std::complex<double>& from) {
|
||||
const double re = numext::real(from);
|
||||
const double im = numext::imag(from);
|
||||
return PacketXcd(PacketXd{re, im, re, im});
|
||||
}
|
||||
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pset1<PacketXcf>(const std::complex<float>& from) {
|
||||
const float re = numext::real(from);
|
||||
const float im = numext::imag(from);
|
||||
return PacketXcf(PacketXf{re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im});
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pset1<PacketXcd>(const std::complex<double>& from) {
|
||||
const double re = numext::real(from);
|
||||
const double im = numext::imag(from);
|
||||
return PacketXcd(PacketXd{re, im, re, im, re, im, re, im});
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
|
||||
// ----------- Unary ops ------------------
|
||||
#define DELEGATE_UNARY_TO_REAL_OP(PACKET_TYPE, OP) \
|
||||
template <> \
|
||||
@@ -195,348 +149,134 @@ EIGEN_STRONG_INLINE PacketXcd pset1<PacketXcd>(const std::complex<double>& from)
|
||||
} \
|
||||
EIGEN_INSTANTIATE_COMPLEX_MATH_FUNCS(PACKET_TYPE)
|
||||
|
||||
EIGEN_CLANG_COMPLEX_UNARY_CWISE_OPS(PacketXcf);
|
||||
EIGEN_CLANG_COMPLEX_UNARY_CWISE_OPS(PacketXcd);
|
||||
|
||||
// --- pconj ---
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
EIGEN_CLANG_COMPLEX_UNARY_CWISE_OPS(Packet8cf);
|
||||
EIGEN_CLANG_COMPLEX_UNARY_CWISE_OPS(Packet4cd);
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pconj<PacketXcf>(const PacketXcf& a) {
|
||||
return PacketXcf(__builtin_shufflevector(a.v, -a.v, 0, 5, 2, 7));
|
||||
EIGEN_STRONG_INLINE Packet8cf pconj<Packet8cf>(const Packet8cf& a) {
|
||||
return Packet8cf(__builtin_shufflevector(a.v, -a.v, 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pconj<PacketXcd>(const PacketXcd& a) {
|
||||
return PacketXcd(__builtin_shufflevector(a.v, -a.v, 0, 3));
|
||||
}
|
||||
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pconj<PacketXcf>(const PacketXcf& a) {
|
||||
return PacketXcf(__builtin_shufflevector(a.v, -a.v, 0, 9, 2, 11, 4, 13, 6, 15));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pconj<PacketXcd>(const PacketXcd& a) {
|
||||
return PacketXcd(__builtin_shufflevector(a.v, -a.v, 0, 5, 2, 7));
|
||||
}
|
||||
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pconj<PacketXcf>(const PacketXcf& a) {
|
||||
return PacketXcf(__builtin_shufflevector(a.v, -a.v, 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pconj<PacketXcd>(const PacketXcd& a) {
|
||||
return PacketXcd(__builtin_shufflevector(a.v, -a.v, 0, 9, 2, 11, 4, 13, 6, 15));
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
|
||||
// Sub-packet pconj specializations needed for reductions.
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 32
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pconj<Packet2cf>(const Packet2cf& a) {
|
||||
return Packet2cf(__builtin_shufflevector(a.v, -a.v, 0, 5, 2, 7));
|
||||
}
|
||||
#endif
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cf pconj<Packet4cf>(const Packet4cf& a) {
|
||||
return Packet4cf(__builtin_shufflevector(a.v, -a.v, 0, 9, 2, 11, 4, 13, 6, 15));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pconj<Packet2cf>(const Packet2cf& a) {
|
||||
return Packet2cf(__builtin_shufflevector(a.v, -a.v, 0, 5, 2, 7));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cd pconj<Packet4cd>(const Packet4cd& a) {
|
||||
return Packet4cd(__builtin_shufflevector(a.v, -a.v, 0, 9, 2, 11, 4, 13, 6, 15));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cd pconj<Packet2cd>(const Packet2cd& a) {
|
||||
return Packet2cd(__builtin_shufflevector(a.v, -a.v, 0, 5, 2, 7));
|
||||
}
|
||||
#endif
|
||||
|
||||
#undef DELEGATE_UNARY_TO_REAL_OP
|
||||
#undef EIGEN_CLANG_COMPLEX_UNARY_CWISE_OPS
|
||||
|
||||
// Flip real and imaginary parts, i.e. {re(a), im(a)} -> {im(a), re(a)}.
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pcplxflip<PacketXcf>(const PacketXcf& a) {
|
||||
return PacketXcf(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2));
|
||||
EIGEN_STRONG_INLINE Packet8cf pcplxflip<Packet8cf>(const Packet8cf& a) {
|
||||
return Packet8cf(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pcplxflip<PacketXcd>(const PacketXcd& a) {
|
||||
return PacketXcd(__builtin_shufflevector(a.v, a.v, 1, 0));
|
||||
}
|
||||
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pcplxflip<PacketXcf>(const PacketXcf& a) {
|
||||
return PacketXcf(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2, 5, 4, 7, 6));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pcplxflip<PacketXcd>(const PacketXcd& a) {
|
||||
return PacketXcd(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2));
|
||||
}
|
||||
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pcplxflip<PacketXcf>(const PacketXcf& a) {
|
||||
return PacketXcf(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pcplxflip<PacketXcd>(const PacketXcd& a) {
|
||||
return PacketXcd(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2, 5, 4, 7, 6));
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
|
||||
// Sub-packet pcplxflip specializations needed for reductions.
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 32
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a) {
|
||||
return Packet2cf(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2));
|
||||
}
|
||||
#endif
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& a) {
|
||||
return Packet4cf(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2, 5, 4, 7, 6));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a) {
|
||||
return Packet2cf(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cd pcplxflip<Packet4cd>(const Packet4cd& a) {
|
||||
return Packet4cd(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2, 5, 4, 7, 6));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& a) {
|
||||
return Packet2cd(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2));
|
||||
}
|
||||
#endif
|
||||
|
||||
// Copy real to imaginary part, i.e. {re(a), im(a)} -> {re(a), re(a)}.
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pdupreal<PacketXcf>(const PacketXcf& a) {
|
||||
return PacketXcf(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2));
|
||||
EIGEN_STRONG_INLINE Packet8cf pdupreal<Packet8cf>(const Packet8cf& a) {
|
||||
return Packet8cf(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pdupreal<PacketXcd>(const PacketXcd& a) {
|
||||
return PacketXcd(__builtin_shufflevector(a.v, a.v, 0, 0));
|
||||
}
|
||||
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pdupreal<PacketXcf>(const PacketXcf& a) {
|
||||
return PacketXcf(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2, 4, 4, 6, 6));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pdupreal<PacketXcd>(const PacketXcd& a) {
|
||||
return PacketXcd(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2));
|
||||
}
|
||||
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pdupreal<PacketXcf>(const PacketXcf& a) {
|
||||
return PacketXcf(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pdupreal<PacketXcd>(const PacketXcd& a) {
|
||||
return PacketXcd(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2, 4, 4, 6, 6));
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
|
||||
// Sub-packet pdupreal specializations needed for reductions.
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 32
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pdupreal<Packet2cf>(const Packet2cf& a) {
|
||||
return Packet2cf(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2));
|
||||
}
|
||||
#endif
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cf pdupreal<Packet4cf>(const Packet4cf& a) {
|
||||
return Packet4cf(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2, 4, 4, 6, 6));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pdupreal<Packet2cf>(const Packet2cf& a) {
|
||||
return Packet2cf(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cd pdupreal<Packet4cd>(const Packet4cd& a) {
|
||||
return Packet4cd(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2, 4, 4, 6, 6));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cd pdupreal<Packet2cd>(const Packet2cd& a) {
|
||||
return Packet2cd(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2));
|
||||
}
|
||||
#endif
|
||||
|
||||
// Copy imaginary to real part, i.e. {re(a), im(a)} -> {im(a), im(a)}.
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pdupimag<PacketXcf>(const PacketXcf& a) {
|
||||
return PacketXcf(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3));
|
||||
EIGEN_STRONG_INLINE Packet8cf pdupimag<Packet8cf>(const Packet8cf& a) {
|
||||
return Packet8cf(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pdupimag<PacketXcd>(const PacketXcd& a) {
|
||||
return PacketXcd(__builtin_shufflevector(a.v, a.v, 1, 1));
|
||||
}
|
||||
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pdupimag<PacketXcf>(const PacketXcf& a) {
|
||||
return PacketXcf(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3, 5, 5, 7, 7));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pdupimag<PacketXcd>(const PacketXcd& a) {
|
||||
return PacketXcd(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3));
|
||||
}
|
||||
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pdupimag<PacketXcf>(const PacketXcf& a) {
|
||||
return PacketXcf(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pdupimag<PacketXcd>(const PacketXcd& a) {
|
||||
return PacketXcd(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3, 5, 5, 7, 7));
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
|
||||
// Sub-packet pdupimag specializations needed for reductions.
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 32
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pdupimag<Packet2cf>(const Packet2cf& a) {
|
||||
return Packet2cf(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3));
|
||||
}
|
||||
#endif
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cf pdupimag<Packet4cf>(const Packet4cf& a) {
|
||||
return Packet4cf(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3, 5, 5, 7, 7));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pdupimag<Packet2cf>(const Packet2cf& a) {
|
||||
return Packet2cf(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cd pdupimag<Packet4cd>(const Packet4cd& a) {
|
||||
return Packet4cd(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3, 5, 5, 7, 7));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cd pdupimag<Packet2cd>(const Packet2cd& a) {
|
||||
return Packet2cd(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3));
|
||||
}
|
||||
#endif
|
||||
|
||||
// --- ploaddup ---
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf ploaddup<PacketXcf>(const std::complex<float>* from) {
|
||||
return pset1<PacketXcf>(*from);
|
||||
EIGEN_STRONG_INLINE Packet8cf ploaddup<Packet8cf>(const std::complex<float>* from) {
|
||||
return Packet8cf(Packet16f{std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]),
|
||||
std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1]),
|
||||
std::real(from[2]), std::imag(from[2]), std::real(from[2]), std::imag(from[2]),
|
||||
std::real(from[3]), std::imag(from[3]), std::real(from[3]), std::imag(from[3])});
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd ploaddup<PacketXcd>(const std::complex<double>* from) {
|
||||
return pset1<PacketXcd>(*from);
|
||||
}
|
||||
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf ploaddup<PacketXcf>(const std::complex<float>* from) {
|
||||
return PacketXcf(PacketXf{std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]),
|
||||
std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1])});
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd ploaddup<PacketXcd>(const std::complex<double>* from) {
|
||||
return pset1<PacketXcd>(*from);
|
||||
}
|
||||
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf ploaddup<PacketXcf>(const std::complex<float>* from) {
|
||||
return PacketXcf(PacketXf{std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]),
|
||||
std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1]),
|
||||
std::real(from[2]), std::imag(from[2]), std::real(from[2]), std::imag(from[2]),
|
||||
std::real(from[3]), std::imag(from[3]), std::real(from[3]), std::imag(from[3])});
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd ploaddup<PacketXcd>(const std::complex<double>* from) {
|
||||
return PacketXcd(PacketXd{std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]),
|
||||
EIGEN_STRONG_INLINE Packet4cd ploaddup<Packet4cd>(const std::complex<double>* from) {
|
||||
return Packet4cd(Packet8d{std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]),
|
||||
std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1])});
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
|
||||
// --- ploadquad ---
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf ploadquad<PacketXcf>(const std::complex<float>* from) {
|
||||
return pset1<PacketXcf>(*from);
|
||||
EIGEN_STRONG_INLINE Packet8cf ploadquad<Packet8cf>(const std::complex<float>* from) {
|
||||
return Packet8cf(Packet16f{std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]),
|
||||
std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]),
|
||||
std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1]),
|
||||
std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1])});
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd ploadquad<PacketXcd>(const std::complex<double>* from) {
|
||||
return pset1<PacketXcd>(*from);
|
||||
EIGEN_STRONG_INLINE Packet4cd ploadquad<Packet4cd>(const std::complex<double>* from) {
|
||||
return pset1<Packet4cd>(*from);
|
||||
}
|
||||
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf ploadquad<PacketXcf>(const std::complex<float>* from) {
|
||||
return pset1<PacketXcf>(*from);
|
||||
EIGEN_STRONG_INLINE Packet8cf preverse<Packet8cf>(const Packet8cf& a) {
|
||||
return Packet8cf(reinterpret_cast<Packet16f>(preverse(reinterpret_cast<Packet8d>(a.v))));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd ploadquad<PacketXcd>(const std::complex<double>* from) {
|
||||
return pset1<PacketXcd>(*from);
|
||||
EIGEN_STRONG_INLINE Packet4cd preverse<Packet4cd>(const Packet4cd& a) {
|
||||
return Packet4cd(__builtin_shufflevector(a.v, a.v, 6, 7, 4, 5, 2, 3, 0, 1));
|
||||
}
|
||||
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf ploadquad<PacketXcf>(const std::complex<float>* from) {
|
||||
return PacketXcf(PacketXf{std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]),
|
||||
std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]),
|
||||
std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1]),
|
||||
std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1])});
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd ploadquad<PacketXcd>(const std::complex<double>* from) {
|
||||
return pset1<PacketXcd>(*from);
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
|
||||
// --- preverse ---
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf preverse<PacketXcf>(const PacketXcf& a) {
|
||||
// 2 complex floats: swap pairs (0,1) and (2,3)
|
||||
return PacketXcf(__builtin_shufflevector(a.v, a.v, 2, 3, 0, 1));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd preverse<PacketXcd>(const PacketXcd& a) {
|
||||
// 1 complex double: identity
|
||||
return a;
|
||||
}
|
||||
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf preverse<PacketXcf>(const PacketXcf& a) {
|
||||
// 4 complex floats: reverse pairs
|
||||
return PacketXcf(reinterpret_cast<PacketXf>(preverse(reinterpret_cast<PacketXd>(a.v))));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd preverse<PacketXcd>(const PacketXcd& a) {
|
||||
// 2 complex doubles: swap pairs
|
||||
return PacketXcd(__builtin_shufflevector(a.v, a.v, 2, 3, 0, 1));
|
||||
}
|
||||
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf preverse<PacketXcf>(const PacketXcf& a) {
|
||||
return PacketXcf(reinterpret_cast<PacketXf>(preverse(reinterpret_cast<PacketXd>(a.v))));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd preverse<PacketXcd>(const PacketXcd& a) {
|
||||
return PacketXcd(__builtin_shufflevector(a.v, a.v, 6, 7, 4, 5, 2, 3, 0, 1));
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
|
||||
// ----------- Binary ops ------------------
|
||||
#define DELEGATE_BINARY_TO_REAL_OP(PACKET_TYPE, OP) \
|
||||
template <> \
|
||||
@@ -560,8 +300,8 @@ EIGEN_STRONG_INLINE PacketXcd preverse<PacketXcd>(const PacketXcd& a) {
|
||||
return PACKET_TYPE(pand(pdupreal(t).v, pdupimag(t).v)); \
|
||||
}
|
||||
|
||||
EIGEN_CLANG_COMPLEX_BINARY_CWISE_OPS(PacketXcf);
|
||||
EIGEN_CLANG_COMPLEX_BINARY_CWISE_OPS(PacketXcd);
|
||||
EIGEN_CLANG_COMPLEX_BINARY_CWISE_OPS(Packet8cf);
|
||||
EIGEN_CLANG_COMPLEX_BINARY_CWISE_OPS(Packet4cd);
|
||||
|
||||
// Binary ops that are needed on sub-packets for predux and predux_mul.
|
||||
#define EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(PACKET_TYPE) \
|
||||
@@ -571,17 +311,11 @@ EIGEN_CLANG_COMPLEX_BINARY_CWISE_OPS(PacketXcd);
|
||||
return pmul_complex(a, b); \
|
||||
}
|
||||
|
||||
EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(PacketXcf);
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 32
|
||||
EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet2cf);
|
||||
#endif
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64
|
||||
EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet8cf);
|
||||
EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet4cf);
|
||||
#endif
|
||||
EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(PacketXcd);
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64
|
||||
EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet2cf);
|
||||
EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet4cd);
|
||||
EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet2cd);
|
||||
#endif
|
||||
|
||||
#define EIGEN_CLANG_PACKET_SCATTER_GATHER(PACKET_TYPE) \
|
||||
template <> \
|
||||
@@ -604,8 +338,8 @@ EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet2cd);
|
||||
return result; \
|
||||
}
|
||||
|
||||
EIGEN_CLANG_PACKET_SCATTER_GATHER(PacketXcf);
|
||||
EIGEN_CLANG_PACKET_SCATTER_GATHER(PacketXcd);
|
||||
EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet8cf);
|
||||
EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet4cd);
|
||||
#undef EIGEN_CLANG_PACKET_SCATTER_GATHER
|
||||
|
||||
#undef DELEGATE_BINARY_TO_REAL_OP
|
||||
@@ -614,89 +348,46 @@ EIGEN_CLANG_PACKET_SCATTER_GATHER(PacketXcd);
|
||||
|
||||
// ------------ ternary ops -------------
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pselect<PacketXcf>(const PacketXcf& mask, const PacketXcf& a, const PacketXcf& b) {
|
||||
return PacketXcf(reinterpret_cast<PacketXf>(
|
||||
pselect(reinterpret_cast<PacketXd>(mask.v), reinterpret_cast<PacketXd>(a.v), reinterpret_cast<PacketXd>(b.v))));
|
||||
EIGEN_STRONG_INLINE Packet8cf pselect<Packet8cf>(const Packet8cf& mask, const Packet8cf& a, const Packet8cf& b) {
|
||||
return Packet8cf(reinterpret_cast<Packet16f>(
|
||||
pselect(reinterpret_cast<Packet8d>(mask.v), reinterpret_cast<Packet8d>(a.v), reinterpret_cast<Packet8d>(b.v))));
|
||||
}
|
||||
|
||||
// --- zip_in_place for complex ---
|
||||
namespace detail {
|
||||
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
|
||||
template <>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<PacketXcf>(PacketXcf& p1, PacketXcf& p2) {
|
||||
PacketXf tmp = __builtin_shufflevector(p1.v, p2.v, 0, 1, 4, 5);
|
||||
p2.v = __builtin_shufflevector(p1.v, p2.v, 2, 3, 6, 7);
|
||||
p1.v = tmp;
|
||||
}
|
||||
// PacketXcd at 16 bytes has 1 element, no zip_in_place needed.
|
||||
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
|
||||
template <>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<PacketXcf>(PacketXcf& p1, PacketXcf& p2) {
|
||||
PacketXf tmp = __builtin_shufflevector(p1.v, p2.v, 0, 1, 8, 9, 2, 3, 10, 11);
|
||||
p2.v = __builtin_shufflevector(p1.v, p2.v, 4, 5, 12, 13, 6, 7, 14, 15);
|
||||
p1.v = tmp;
|
||||
}
|
||||
template <>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<PacketXcd>(PacketXcd& p1, PacketXcd& p2) {
|
||||
PacketXd tmp = __builtin_shufflevector(p1.v, p2.v, 0, 1, 4, 5);
|
||||
p2.v = __builtin_shufflevector(p1.v, p2.v, 2, 3, 6, 7);
|
||||
p1.v = tmp;
|
||||
}
|
||||
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
|
||||
template <>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<PacketXcf>(PacketXcf& p1, PacketXcf& p2) {
|
||||
PacketXf tmp = __builtin_shufflevector(p1.v, p2.v, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23);
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<Packet8cf>(Packet8cf& p1, Packet8cf& p2) {
|
||||
Packet16f tmp = __builtin_shufflevector(p1.v, p2.v, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23);
|
||||
p2.v = __builtin_shufflevector(p1.v, p2.v, 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31);
|
||||
p1.v = tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<PacketXcd>(PacketXcd& p1, PacketXcd& p2) {
|
||||
PacketXd tmp = __builtin_shufflevector(p1.v, p2.v, 0, 1, 8, 9, 2, 3, 10, 11);
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<Packet4cd>(Packet4cd& p1, Packet4cd& p2) {
|
||||
Packet8d tmp = __builtin_shufflevector(p1.v, p2.v, 0, 1, 8, 9, 2, 3, 10, 11);
|
||||
p2.v = __builtin_shufflevector(p1.v, p2.v, 4, 5, 12, 13, 6, 7, 14, 15);
|
||||
p1.v = tmp;
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
|
||||
} // namespace detail
|
||||
|
||||
// --- ptranspose for complex ---
|
||||
// PacketXcf: valid block sizes depend on kComplexFloatSize.
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<PacketXcf, 2>& kernel) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8cf, 8>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 32
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<PacketXcf, 4>& kernel) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8cf, 4>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
#endif
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<PacketXcf, 8>& kernel) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8cf, 2>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
#endif
|
||||
|
||||
// PacketXcd: valid block sizes depend on kComplexDoubleSize.
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 32
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<PacketXcd, 2>& kernel) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4cd, 4>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
#endif
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<PacketXcd, 4>& kernel) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4cd, 2>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PacketXcf, PacketXf)
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PacketXcd, PacketXd)
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet8cf, Packet16f)
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cd, Packet8d)
|
||||
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -18,27 +18,27 @@ namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf pfrexp<PacketXf>(const PacketXf& a, PacketXf& exponent) {
|
||||
EIGEN_STRONG_INLINE Packet16f pfrexp<Packet16f>(const Packet16f& a, Packet16f& exponent) {
|
||||
return pfrexp_generic(a, exponent);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd pfrexp<PacketXd>(const PacketXd& a, PacketXd& exponent) {
|
||||
EIGEN_STRONG_INLINE Packet8d pfrexp<Packet8d>(const Packet8d& a, Packet8d& exponent) {
|
||||
return pfrexp_generic(a, exponent);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf pldexp<PacketXf>(const PacketXf& a, const PacketXf& exponent) {
|
||||
EIGEN_STRONG_INLINE Packet16f pldexp<Packet16f>(const Packet16f& a, const Packet16f& exponent) {
|
||||
return pldexp_generic(a, exponent);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd pldexp<PacketXd>(const PacketXd& a, const PacketXd& exponent) {
|
||||
EIGEN_STRONG_INLINE Packet8d pldexp<Packet8d>(const Packet8d& a, const Packet8d& exponent) {
|
||||
return pldexp_generic(a, exponent);
|
||||
}
|
||||
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(PacketXf)
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(PacketXd)
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet16f)
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet8d)
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
@@ -24,32 +24,14 @@ template <typename ScalarT, int n>
|
||||
using VectorType = ScalarT __attribute__((ext_vector_type(n), aligned(n * sizeof(ScalarT))));
|
||||
} // namespace detail
|
||||
|
||||
// --- Naming Convention ---
|
||||
// This backend uses size-independent type aliases so the same code works
|
||||
// for EIGEN_GENERIC_VECTOR_SIZE_BYTES in {16, 32, 64}:
|
||||
//
|
||||
// PacketXf - float vector (4, 8, or 16 elements)
|
||||
// PacketXd - double vector (2, 4, or 8 elements)
|
||||
// PacketXi - int32_t vector (4, 8, or 16 elements)
|
||||
// PacketXl - int64_t vector (2, 4, or 8 elements)
|
||||
// PacketXcf - complex<float> vector (2, 4, or 8 elements) [in Complex.h]
|
||||
// PacketXcd - complex<double> vector (1, 2, or 4 elements) [in Complex.h]
|
||||
//
|
||||
// The "X" suffix indicates the element count is determined by the macro
|
||||
// EIGEN_GENERIC_VECTOR_SIZE_BYTES at compile time. Operations that require
|
||||
// compile-time constant indices (e.g. __builtin_shufflevector) use
|
||||
// #if EIGEN_GENERIC_VECTOR_SIZE_BYTES == ... blocks.
|
||||
// --- Primary packet type definitions (fixed at 64 bytes) ---
|
||||
|
||||
static_assert(EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 || EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 ||
|
||||
EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64,
|
||||
"EIGEN_GENERIC_VECTOR_SIZE_BYTES must be 16, 32, or 64");
|
||||
|
||||
constexpr int kFloatPacketSize = EIGEN_GENERIC_VECTOR_SIZE_BYTES / sizeof(float);
|
||||
constexpr int kDoublePacketSize = EIGEN_GENERIC_VECTOR_SIZE_BYTES / sizeof(double);
|
||||
using PacketXf = detail::VectorType<float, kFloatPacketSize>;
|
||||
using PacketXd = detail::VectorType<double, kDoublePacketSize>;
|
||||
using PacketXi = detail::VectorType<int32_t, kFloatPacketSize>;
|
||||
using PacketXl = detail::VectorType<int64_t, kDoublePacketSize>;
|
||||
// TODO(rmlarsen): Generalize to other vector sizes.
|
||||
static_assert(EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64, "We currently assume the full vector size is 64 bytes");
|
||||
using Packet16f = detail::VectorType<float, 16>;
|
||||
using Packet8d = detail::VectorType<double, 8>;
|
||||
using Packet16i = detail::VectorType<int32_t, 16>;
|
||||
using Packet8l = detail::VectorType<int64_t, 8>;
|
||||
|
||||
// --- packet_traits specializations ---
|
||||
struct generic_float_packet_traits : default_packet_traits {
|
||||
@@ -100,20 +82,20 @@ struct generic_float_packet_traits : default_packet_traits {
|
||||
|
||||
template <>
|
||||
struct packet_traits<float> : generic_float_packet_traits {
|
||||
using type = PacketXf;
|
||||
using half = PacketXf;
|
||||
using type = Packet16f;
|
||||
using half = Packet16f;
|
||||
enum {
|
||||
size = kFloatPacketSize,
|
||||
size = 16,
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct packet_traits<double> : generic_float_packet_traits {
|
||||
using type = PacketXd;
|
||||
using half = PacketXd;
|
||||
using type = Packet8d;
|
||||
using half = Packet8d;
|
||||
// Generic double-precision acos/asin are not yet implemented in
|
||||
// GenericPacketMathFunctions.h (only float versions exist).
|
||||
enum { size = kDoublePacketSize, HasACos = 0, HasASin = 0 };
|
||||
enum { size = 8, HasACos = 0, HasASin = 0 };
|
||||
};
|
||||
|
||||
struct generic_integer_packet_traits : default_packet_traits {
|
||||
@@ -149,19 +131,19 @@ struct generic_integer_packet_traits : default_packet_traits {
|
||||
|
||||
template <>
|
||||
struct packet_traits<int32_t> : generic_integer_packet_traits {
|
||||
using type = PacketXi;
|
||||
using half = PacketXi;
|
||||
using type = Packet16i;
|
||||
using half = Packet16i;
|
||||
enum {
|
||||
size = kFloatPacketSize,
|
||||
size = 16,
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct packet_traits<int64_t> : generic_integer_packet_traits {
|
||||
using type = PacketXl;
|
||||
using half = PacketXl;
|
||||
using type = Packet8l;
|
||||
using half = Packet8l;
|
||||
enum {
|
||||
size = kDoublePacketSize,
|
||||
size = 8,
|
||||
};
|
||||
};
|
||||
|
||||
@@ -174,37 +156,37 @@ struct generic_unpacket_traits : default_unpacket_traits {
|
||||
};
|
||||
|
||||
template <>
|
||||
struct unpacket_traits<PacketXf> : generic_unpacket_traits {
|
||||
struct unpacket_traits<Packet16f> : generic_unpacket_traits {
|
||||
using type = float;
|
||||
using half = PacketXf;
|
||||
using integer_packet = PacketXi;
|
||||
using half = Packet16f;
|
||||
using integer_packet = Packet16i;
|
||||
enum {
|
||||
size = kFloatPacketSize,
|
||||
size = 16,
|
||||
};
|
||||
};
|
||||
template <>
|
||||
struct unpacket_traits<PacketXd> : generic_unpacket_traits {
|
||||
struct unpacket_traits<Packet8d> : generic_unpacket_traits {
|
||||
using type = double;
|
||||
using half = PacketXd;
|
||||
using integer_packet = PacketXl;
|
||||
using half = Packet8d;
|
||||
using integer_packet = Packet8l;
|
||||
enum {
|
||||
size = kDoublePacketSize,
|
||||
size = 8,
|
||||
};
|
||||
};
|
||||
template <>
|
||||
struct unpacket_traits<PacketXi> : generic_unpacket_traits {
|
||||
struct unpacket_traits<Packet16i> : generic_unpacket_traits {
|
||||
using type = int32_t;
|
||||
using half = PacketXi;
|
||||
using half = Packet16i;
|
||||
enum {
|
||||
size = kFloatPacketSize,
|
||||
size = 16,
|
||||
};
|
||||
};
|
||||
template <>
|
||||
struct unpacket_traits<PacketXl> : generic_unpacket_traits {
|
||||
struct unpacket_traits<Packet8l> : generic_unpacket_traits {
|
||||
using type = int64_t;
|
||||
using half = PacketXl;
|
||||
using half = Packet8l;
|
||||
enum {
|
||||
size = kDoublePacketSize,
|
||||
size = 8,
|
||||
};
|
||||
};
|
||||
|
||||
@@ -283,21 +265,21 @@ EIGEN_STRONG_INLINE void store_vector_aligned(scalar_type_of_vector_t<VectorT>*
|
||||
detail::store_vector_aligned<PACKET_TYPE>(to, from); \
|
||||
}
|
||||
|
||||
EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(PacketXf)
|
||||
EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(PacketXd)
|
||||
EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(PacketXi)
|
||||
EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(PacketXl)
|
||||
EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet16f)
|
||||
EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet8d)
|
||||
EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet16i)
|
||||
EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet8l)
|
||||
#undef EIGEN_CLANG_PACKET_LOAD_STORE_PACKET
|
||||
|
||||
// --- Broadcast operation ---
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf pset1frombits<PacketXf>(uint32_t from) {
|
||||
return PacketXf(numext::bit_cast<float>(from));
|
||||
EIGEN_STRONG_INLINE Packet16f pset1frombits<Packet16f>(uint32_t from) {
|
||||
return Packet16f(numext::bit_cast<float>(from));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd pset1frombits<PacketXd>(uint64_t from) {
|
||||
return PacketXd(numext::bit_cast<double>(from));
|
||||
EIGEN_STRONG_INLINE Packet8d pset1frombits<Packet8d>(uint64_t from) {
|
||||
return Packet8d(numext::bit_cast<double>(from));
|
||||
}
|
||||
|
||||
#define EIGEN_CLANG_PACKET_SET1(PACKET_TYPE) \
|
||||
@@ -310,10 +292,10 @@ EIGEN_STRONG_INLINE PacketXd pset1frombits<PacketXd>(uint64_t from) {
|
||||
return from[0]; \
|
||||
}
|
||||
|
||||
EIGEN_CLANG_PACKET_SET1(PacketXf)
|
||||
EIGEN_CLANG_PACKET_SET1(PacketXd)
|
||||
EIGEN_CLANG_PACKET_SET1(PacketXi)
|
||||
EIGEN_CLANG_PACKET_SET1(PacketXl)
|
||||
EIGEN_CLANG_PACKET_SET1(Packet16f)
|
||||
EIGEN_CLANG_PACKET_SET1(Packet8d)
|
||||
EIGEN_CLANG_PACKET_SET1(Packet16i)
|
||||
EIGEN_CLANG_PACKET_SET1(Packet8l)
|
||||
#undef EIGEN_CLANG_PACKET_SET1
|
||||
|
||||
// --- Arithmetic operations ---
|
||||
@@ -327,10 +309,10 @@ EIGEN_CLANG_PACKET_SET1(PacketXl)
|
||||
return -a; \
|
||||
}
|
||||
|
||||
EIGEN_CLANG_PACKET_ARITHMETIC(PacketXf)
|
||||
EIGEN_CLANG_PACKET_ARITHMETIC(PacketXd)
|
||||
EIGEN_CLANG_PACKET_ARITHMETIC(PacketXi)
|
||||
EIGEN_CLANG_PACKET_ARITHMETIC(PacketXl)
|
||||
EIGEN_CLANG_PACKET_ARITHMETIC(Packet16f)
|
||||
EIGEN_CLANG_PACKET_ARITHMETIC(Packet8d)
|
||||
EIGEN_CLANG_PACKET_ARITHMETIC(Packet16i)
|
||||
EIGEN_CLANG_PACKET_ARITHMETIC(Packet8l)
|
||||
#undef EIGEN_CLANG_PACKET_ARITHMETIC
|
||||
|
||||
// --- Bitwise operations (via casting) ---
|
||||
@@ -339,10 +321,10 @@ namespace detail {
|
||||
|
||||
// Reinterpret-cast helpers, equivalent to preinterpret<> but defined here
|
||||
// because PacketMath.h is included before TypeCasting.h.
|
||||
EIGEN_STRONG_INLINE PacketXi preinterpret_float_to_int(const PacketXf& a) { return reinterpret_cast<PacketXi>(a); }
|
||||
EIGEN_STRONG_INLINE PacketXf preinterpret_int_to_float(const PacketXi& a) { return reinterpret_cast<PacketXf>(a); }
|
||||
EIGEN_STRONG_INLINE PacketXl preinterpret_double_to_long(const PacketXd& a) { return reinterpret_cast<PacketXl>(a); }
|
||||
EIGEN_STRONG_INLINE PacketXd preinterpret_long_to_double(const PacketXl& a) { return reinterpret_cast<PacketXd>(a); }
|
||||
EIGEN_STRONG_INLINE Packet16i preinterpret_float_to_int(const Packet16f& a) { return reinterpret_cast<Packet16i>(a); }
|
||||
EIGEN_STRONG_INLINE Packet16f preinterpret_int_to_float(const Packet16i& a) { return reinterpret_cast<Packet16f>(a); }
|
||||
EIGEN_STRONG_INLINE Packet8l preinterpret_double_to_long(const Packet8d& a) { return reinterpret_cast<Packet8l>(a); }
|
||||
EIGEN_STRONG_INLINE Packet8d preinterpret_long_to_double(const Packet8l& a) { return reinterpret_cast<Packet8d>(a); }
|
||||
|
||||
} // namespace detail
|
||||
|
||||
@@ -386,8 +368,8 @@ EIGEN_STRONG_INLINE PacketXd preinterpret_long_to_double(const PacketXl& a) { re
|
||||
return a << N; \
|
||||
}
|
||||
|
||||
EIGEN_CLANG_PACKET_BITWISE_INT(PacketXi)
|
||||
EIGEN_CLANG_PACKET_BITWISE_INT(PacketXl)
|
||||
EIGEN_CLANG_PACKET_BITWISE_INT(Packet16i)
|
||||
EIGEN_CLANG_PACKET_BITWISE_INT(Packet8l)
|
||||
#undef EIGEN_CLANG_PACKET_BITWISE_INT
|
||||
|
||||
// Bitwise ops for floating point packets
|
||||
@@ -419,8 +401,8 @@ EIGEN_CLANG_PACKET_BITWISE_INT(PacketXl)
|
||||
return CAST_FROM_INT(CAST_TO_INT(a) & ~CAST_TO_INT(b)); \
|
||||
}
|
||||
|
||||
EIGEN_CLANG_PACKET_BITWISE_FLOAT(PacketXf, detail::preinterpret_float_to_int, detail::preinterpret_int_to_float)
|
||||
EIGEN_CLANG_PACKET_BITWISE_FLOAT(PacketXd, detail::preinterpret_double_to_long, detail::preinterpret_long_to_double)
|
||||
EIGEN_CLANG_PACKET_BITWISE_FLOAT(Packet16f, detail::preinterpret_float_to_int, detail::preinterpret_int_to_float)
|
||||
EIGEN_CLANG_PACKET_BITWISE_FLOAT(Packet8d, detail::preinterpret_double_to_long, detail::preinterpret_long_to_double)
|
||||
#undef EIGEN_CLANG_PACKET_BITWISE_FLOAT
|
||||
|
||||
// --- Comparison operations ---
|
||||
@@ -446,8 +428,8 @@ EIGEN_CLANG_PACKET_BITWISE_FLOAT(PacketXd, detail::preinterpret_double_to_long,
|
||||
return numext::bit_cast<PACKET_TYPE>(INT_PACKET_TYPE(!(a >= b))); \
|
||||
}
|
||||
|
||||
EIGEN_CLANG_PACKET_CMP(PacketXf, PacketXi)
|
||||
EIGEN_CLANG_PACKET_CMP(PacketXd, PacketXl)
|
||||
EIGEN_CLANG_PACKET_CMP(Packet16f, Packet16i)
|
||||
EIGEN_CLANG_PACKET_CMP(Packet8d, Packet8l)
|
||||
#undef EIGEN_CLANG_PACKET_CMP
|
||||
|
||||
// --- Min/Max operations ---
|
||||
@@ -490,10 +472,10 @@ EIGEN_CLANG_PACKET_CMP(PacketXd, PacketXl)
|
||||
return mask != 0 ? a : b; \
|
||||
}
|
||||
|
||||
EIGEN_CLANG_PACKET_ELEMENTWISE(PacketXf)
|
||||
EIGEN_CLANG_PACKET_ELEMENTWISE(PacketXd)
|
||||
EIGEN_CLANG_PACKET_ELEMENTWISE(PacketXi)
|
||||
EIGEN_CLANG_PACKET_ELEMENTWISE(PacketXl)
|
||||
EIGEN_CLANG_PACKET_ELEMENTWISE(Packet16f)
|
||||
EIGEN_CLANG_PACKET_ELEMENTWISE(Packet8d)
|
||||
EIGEN_CLANG_PACKET_ELEMENTWISE(Packet16i)
|
||||
EIGEN_CLANG_PACKET_ELEMENTWISE(Packet8l)
|
||||
#undef EIGEN_CLANG_PACKET_ELEMENTWISE
|
||||
#endif
|
||||
|
||||
@@ -528,8 +510,8 @@ EIGEN_CLANG_PACKET_ELEMENTWISE(PacketXl)
|
||||
return __builtin_elementwise_sqrt(a); \
|
||||
}
|
||||
|
||||
EIGEN_CLANG_PACKET_MATH_FLOAT(PacketXf)
|
||||
EIGEN_CLANG_PACKET_MATH_FLOAT(PacketXd)
|
||||
EIGEN_CLANG_PACKET_MATH_FLOAT(Packet16f)
|
||||
EIGEN_CLANG_PACKET_MATH_FLOAT(Packet8d)
|
||||
#undef EIGEN_CLANG_PACKET_MATH_FLOAT
|
||||
#endif
|
||||
|
||||
@@ -581,8 +563,8 @@ EIGEN_CLANG_PACKET_MATH_FLOAT(PacketXd)
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_CLANG_PACKET_MADD(PacketXf)
|
||||
EIGEN_CLANG_PACKET_MADD(PacketXd)
|
||||
EIGEN_CLANG_PACKET_MADD(Packet16f)
|
||||
EIGEN_CLANG_PACKET_MADD(Packet8d)
|
||||
#undef EIGEN_CLANG_PACKET_MADD
|
||||
|
||||
#define EIGEN_CLANG_PACKET_SCATTER_GATHER(PACKET_TYPE) \
|
||||
@@ -604,10 +586,10 @@ EIGEN_CLANG_PACKET_MADD(PacketXd)
|
||||
return result; \
|
||||
}
|
||||
|
||||
EIGEN_CLANG_PACKET_SCATTER_GATHER(PacketXf)
|
||||
EIGEN_CLANG_PACKET_SCATTER_GATHER(PacketXd)
|
||||
EIGEN_CLANG_PACKET_SCATTER_GATHER(PacketXi)
|
||||
EIGEN_CLANG_PACKET_SCATTER_GATHER(PacketXl)
|
||||
EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet16f)
|
||||
EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet8d)
|
||||
EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet16i)
|
||||
EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet8l)
|
||||
|
||||
#undef EIGEN_CLANG_PACKET_SCATTER_GATHER
|
||||
|
||||
@@ -615,14 +597,6 @@ EIGEN_CLANG_PACKET_SCATTER_GATHER(PacketXl)
|
||||
#if EIGEN_HAS_BUILTIN(__builtin_shufflevector)
|
||||
namespace detail {
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE Packet preverse_impl_2(const Packet& a) {
|
||||
return __builtin_shufflevector(a, a, 1, 0);
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE Packet preverse_impl_4(const Packet& a) {
|
||||
return __builtin_shufflevector(a, a, 3, 2, 1, 0);
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE Packet preverse_impl_8(const Packet& a) {
|
||||
return __builtin_shufflevector(a, a, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
}
|
||||
@@ -632,81 +606,33 @@ EIGEN_STRONG_INLINE Packet preverse_impl_16(const Packet& a) {
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
#define EIGEN_CLANG_PACKET_REVERSE(PACKET_TYPE, SIZE) \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE PACKET_TYPE preverse<PACKET_TYPE>(const PACKET_TYPE& a) { \
|
||||
return detail::preverse_impl_##SIZE(a); \
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf preverse<PacketXf>(const PacketXf& a) {
|
||||
return detail::preverse_impl_4(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd preverse<PacketXd>(const PacketXd& a) {
|
||||
return detail::preverse_impl_2(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXi preverse<PacketXi>(const PacketXi& a) {
|
||||
return detail::preverse_impl_4(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXl preverse<PacketXl>(const PacketXl& a) {
|
||||
return detail::preverse_impl_2(a);
|
||||
}
|
||||
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf preverse<PacketXf>(const PacketXf& a) {
|
||||
return detail::preverse_impl_8(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd preverse<PacketXd>(const PacketXd& a) {
|
||||
return detail::preverse_impl_4(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXi preverse<PacketXi>(const PacketXi& a) {
|
||||
return detail::preverse_impl_8(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXl preverse<PacketXl>(const PacketXl& a) {
|
||||
return detail::preverse_impl_4(a);
|
||||
}
|
||||
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf preverse<PacketXf>(const PacketXf& a) {
|
||||
return detail::preverse_impl_16(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd preverse<PacketXd>(const PacketXd& a) {
|
||||
return detail::preverse_impl_8(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXi preverse<PacketXi>(const PacketXi& a) {
|
||||
return detail::preverse_impl_16(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXl preverse<PacketXl>(const PacketXl& a) {
|
||||
return detail::preverse_impl_8(a);
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
EIGEN_CLANG_PACKET_REVERSE(Packet16f, 16)
|
||||
EIGEN_CLANG_PACKET_REVERSE(Packet8d, 8)
|
||||
EIGEN_CLANG_PACKET_REVERSE(Packet16i, 16)
|
||||
EIGEN_CLANG_PACKET_REVERSE(Packet8l, 8)
|
||||
#undef EIGEN_CLANG_PACKET_REVERSE
|
||||
|
||||
namespace detail {
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE Packet ploaddup2(const typename unpacket_traits<Packet>::type* from) {
|
||||
EIGEN_STRONG_INLINE Packet ploaddup16(const typename unpacket_traits<Packet>::type* from) {
|
||||
static_assert((unpacket_traits<Packet>::size) % 2 == 0, "Packet size must be a multiple of 2");
|
||||
using HalfPacket = HalfPacket<Packet>;
|
||||
HalfPacket a = load_vector_unaligned<HalfPacket>(from);
|
||||
return __builtin_shufflevector(a, a, 0, 0);
|
||||
return __builtin_shufflevector(a, a, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7);
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE Packet ploaddup4(const typename unpacket_traits<Packet>::type* from) {
|
||||
static_assert((unpacket_traits<Packet>::size) % 2 == 0, "Packet size must be a multiple of 2");
|
||||
using HalfPacket = HalfPacket<Packet>;
|
||||
HalfPacket a = load_vector_unaligned<HalfPacket>(from);
|
||||
return __builtin_shufflevector(a, a, 0, 0, 1, 1);
|
||||
EIGEN_STRONG_INLINE Packet ploadquad16(const typename unpacket_traits<Packet>::type* from) {
|
||||
static_assert((unpacket_traits<Packet>::size) % 4 == 0, "Packet size must be a multiple of 4");
|
||||
using QuarterPacket = QuarterPacket<Packet>;
|
||||
QuarterPacket a = load_vector_unaligned<QuarterPacket>(from);
|
||||
return __builtin_shufflevector(a, a, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3);
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
@@ -717,22 +643,6 @@ EIGEN_STRONG_INLINE Packet ploaddup8(const typename unpacket_traits<Packet>::typ
|
||||
return __builtin_shufflevector(a, a, 0, 0, 1, 1, 2, 2, 3, 3);
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE Packet ploaddup16(const typename unpacket_traits<Packet>::type* from) {
|
||||
static_assert((unpacket_traits<Packet>::size) % 2 == 0, "Packet size must be a multiple of 2");
|
||||
using HalfPacket = HalfPacket<Packet>;
|
||||
HalfPacket a = load_vector_unaligned<HalfPacket>(from);
|
||||
return __builtin_shufflevector(a, a, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7);
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE Packet ploadquad4(const typename unpacket_traits<Packet>::type* from) {
|
||||
static_assert((unpacket_traits<Packet>::size) % 4 == 0, "Packet size must be a multiple of 4");
|
||||
using QuarterPacket = QuarterPacket<Packet>;
|
||||
QuarterPacket a = load_vector_unaligned<QuarterPacket>(from);
|
||||
return __builtin_shufflevector(a, a, 0, 0, 0, 0);
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE Packet ploadquad8(const typename unpacket_traits<Packet>::type* from) {
|
||||
static_assert((unpacket_traits<Packet>::size) % 4 == 0, "Packet size must be a multiple of 4");
|
||||
@@ -741,241 +651,84 @@ EIGEN_STRONG_INLINE Packet ploadquad8(const typename unpacket_traits<Packet>::ty
|
||||
return __builtin_shufflevector(a, a, 0, 0, 0, 0, 1, 1, 1, 1);
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE Packet ploadquad16(const typename unpacket_traits<Packet>::type* from) {
|
||||
static_assert((unpacket_traits<Packet>::size) % 4 == 0, "Packet size must be a multiple of 4");
|
||||
using QuarterPacket = QuarterPacket<Packet>;
|
||||
QuarterPacket a = load_vector_unaligned<QuarterPacket>(from);
|
||||
return __builtin_shufflevector(a, a, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf ploaddup<PacketXf>(const float* from) {
|
||||
return detail::ploaddup4<PacketXf>(from);
|
||||
EIGEN_STRONG_INLINE Packet16f ploaddup<Packet16f>(const float* from) {
|
||||
return detail::ploaddup16<Packet16f>(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd ploaddup<PacketXd>(const double* from) {
|
||||
return detail::ploaddup2<PacketXd>(from);
|
||||
EIGEN_STRONG_INLINE Packet8d ploaddup<Packet8d>(const double* from) {
|
||||
return detail::ploaddup8<Packet8d>(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXi ploaddup<PacketXi>(const int32_t* from) {
|
||||
return detail::ploaddup4<PacketXi>(from);
|
||||
EIGEN_STRONG_INLINE Packet16i ploaddup<Packet16i>(const int32_t* from) {
|
||||
return detail::ploaddup16<Packet16i>(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXl ploaddup<PacketXl>(const int64_t* from) {
|
||||
return detail::ploaddup2<PacketXl>(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf ploadquad<PacketXf>(const float* from) {
|
||||
return detail::ploadquad4<PacketXf>(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXi ploadquad<PacketXi>(const int32_t* from) {
|
||||
return detail::ploadquad4<PacketXi>(from);
|
||||
}
|
||||
// No ploadquad for 2-element packets (PacketXd, PacketXl) at 16 bytes.
|
||||
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf ploaddup<PacketXf>(const float* from) {
|
||||
return detail::ploaddup8<PacketXf>(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd ploaddup<PacketXd>(const double* from) {
|
||||
return detail::ploaddup4<PacketXd>(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXi ploaddup<PacketXi>(const int32_t* from) {
|
||||
return detail::ploaddup8<PacketXi>(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXl ploaddup<PacketXl>(const int64_t* from) {
|
||||
return detail::ploaddup4<PacketXl>(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf ploadquad<PacketXf>(const float* from) {
|
||||
return detail::ploadquad8<PacketXf>(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd ploadquad<PacketXd>(const double* from) {
|
||||
return detail::ploadquad4<PacketXd>(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXi ploadquad<PacketXi>(const int32_t* from) {
|
||||
return detail::ploadquad8<PacketXi>(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXl ploadquad<PacketXl>(const int64_t* from) {
|
||||
return detail::ploadquad4<PacketXl>(from);
|
||||
EIGEN_STRONG_INLINE Packet8l ploaddup<Packet8l>(const int64_t* from) {
|
||||
return detail::ploaddup8<Packet8l>(from);
|
||||
}
|
||||
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf ploaddup<PacketXf>(const float* from) {
|
||||
return detail::ploaddup16<PacketXf>(from);
|
||||
EIGEN_STRONG_INLINE Packet16f ploadquad<Packet16f>(const float* from) {
|
||||
return detail::ploadquad16<Packet16f>(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd ploaddup<PacketXd>(const double* from) {
|
||||
return detail::ploaddup8<PacketXd>(from);
|
||||
EIGEN_STRONG_INLINE Packet8d ploadquad<Packet8d>(const double* from) {
|
||||
return detail::ploadquad8<Packet8d>(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXi ploaddup<PacketXi>(const int32_t* from) {
|
||||
return detail::ploaddup16<PacketXi>(from);
|
||||
EIGEN_STRONG_INLINE Packet16i ploadquad<Packet16i>(const int32_t* from) {
|
||||
return detail::ploadquad16<Packet16i>(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXl ploaddup<PacketXl>(const int64_t* from) {
|
||||
return detail::ploaddup8<PacketXl>(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf ploadquad<PacketXf>(const float* from) {
|
||||
return detail::ploadquad16<PacketXf>(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd ploadquad<PacketXd>(const double* from) {
|
||||
return detail::ploadquad8<PacketXd>(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXi ploadquad<PacketXi>(const int32_t* from) {
|
||||
return detail::ploadquad16<PacketXi>(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXl ploadquad<PacketXl>(const int64_t* from) {
|
||||
return detail::ploadquad8<PacketXl>(from);
|
||||
EIGEN_STRONG_INLINE Packet8l ploadquad<Packet8l>(const int64_t* from) {
|
||||
return detail::ploadquad8<Packet8l>(from);
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
|
||||
// --- plset ---
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf plset<PacketXf>(const float& a) {
|
||||
return PacketXf{a + 0.0f, a + 1.0f, a + 2.0f, a + 3.0f};
|
||||
EIGEN_STRONG_INLINE Packet16f plset<Packet16f>(const float& a) {
|
||||
Packet16f x{a + 0.0f, a + 1.0f, a + 2.0f, a + 3.0f, a + 4.0f, a + 5.0f, a + 6.0f, a + 7.0f,
|
||||
a + 8.0f, a + 9.0f, a + 10.0f, a + 11.0f, a + 12.0f, a + 13.0f, a + 14.0f, a + 15.0f};
|
||||
return x;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd plset<PacketXd>(const double& a) {
|
||||
return PacketXd{a + 0.0, a + 1.0};
|
||||
EIGEN_STRONG_INLINE Packet8d plset<Packet8d>(const double& a) {
|
||||
return Packet8d{a + 0.0, a + 1.0, a + 2.0, a + 3.0, a + 4.0, a + 5.0, a + 6.0, a + 7.0};
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXi plset<PacketXi>(const int32_t& a) {
|
||||
return PacketXi{a + 0, a + 1, a + 2, a + 3};
|
||||
EIGEN_STRONG_INLINE Packet16i plset<Packet16i>(const int32_t& a) {
|
||||
return Packet16i{a + 0, a + 1, a + 2, a + 3, a + 4, a + 5, a + 6, a + 7,
|
||||
a + 8, a + 9, a + 10, a + 11, a + 12, a + 13, a + 14, a + 15};
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXl plset<PacketXl>(const int64_t& a) {
|
||||
return PacketXl{a + 0, a + 1};
|
||||
EIGEN_STRONG_INLINE Packet8l plset<Packet8l>(const int64_t& a) {
|
||||
return Packet8l{a + 0, a + 1, a + 2, a + 3, a + 4, a + 5, a + 6, a + 7};
|
||||
}
|
||||
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf plset<PacketXf>(const float& a) {
|
||||
return PacketXf{a + 0.0f, a + 1.0f, a + 2.0f, a + 3.0f, a + 4.0f, a + 5.0f, a + 6.0f, a + 7.0f};
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd plset<PacketXd>(const double& a) {
|
||||
return PacketXd{a + 0.0, a + 1.0, a + 2.0, a + 3.0};
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXi plset<PacketXi>(const int32_t& a) {
|
||||
return PacketXi{a + 0, a + 1, a + 2, a + 3, a + 4, a + 5, a + 6, a + 7};
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXl plset<PacketXl>(const int64_t& a) {
|
||||
return PacketXl{a + 0, a + 1, a + 2, a + 3};
|
||||
}
|
||||
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf plset<PacketXf>(const float& a) {
|
||||
return PacketXf{a + 0.0f, a + 1.0f, a + 2.0f, a + 3.0f, a + 4.0f, a + 5.0f, a + 6.0f, a + 7.0f,
|
||||
a + 8.0f, a + 9.0f, a + 10.0f, a + 11.0f, a + 12.0f, a + 13.0f, a + 14.0f, a + 15.0f};
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd plset<PacketXd>(const double& a) {
|
||||
return PacketXd{a + 0.0, a + 1.0, a + 2.0, a + 3.0, a + 4.0, a + 5.0, a + 6.0, a + 7.0};
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXi plset<PacketXi>(const int32_t& a) {
|
||||
return PacketXi{a + 0, a + 1, a + 2, a + 3, a + 4, a + 5, a + 6, a + 7,
|
||||
a + 8, a + 9, a + 10, a + 11, a + 12, a + 13, a + 14, a + 15};
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXl plset<PacketXl>(const int64_t& a) {
|
||||
return PacketXl{a + 0, a + 1, a + 2, a + 3, a + 4, a + 5, a + 6, a + 7};
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
|
||||
// --- peven_mask ---
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf peven_mask(const PacketXf& /* unused */) {
|
||||
EIGEN_STRONG_INLINE Packet16f peven_mask(const Packet16f& /* unused */) {
|
||||
float kTrue = numext::bit_cast<float>(int32_t(-1));
|
||||
float kFalse = 0.0f;
|
||||
return PacketXf{kTrue, kFalse, kTrue, kFalse};
|
||||
return Packet16f{kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, kTrue, kFalse,
|
||||
kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, kTrue, kFalse};
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd peven_mask(const PacketXd& /* unused */) {
|
||||
EIGEN_STRONG_INLINE Packet8d peven_mask(const Packet8d& /* unused */) {
|
||||
double kTrue = numext::bit_cast<double>(int64_t(-1l));
|
||||
double kFalse = 0.0;
|
||||
return PacketXd{kTrue, kFalse};
|
||||
return Packet8d{kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, kTrue, kFalse};
|
||||
}
|
||||
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf peven_mask(const PacketXf& /* unused */) {
|
||||
float kTrue = numext::bit_cast<float>(int32_t(-1));
|
||||
float kFalse = 0.0f;
|
||||
return PacketXf{kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, kTrue, kFalse};
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd peven_mask(const PacketXd& /* unused */) {
|
||||
double kTrue = numext::bit_cast<double>(int64_t(-1l));
|
||||
double kFalse = 0.0;
|
||||
return PacketXd{kTrue, kFalse, kTrue, kFalse};
|
||||
}
|
||||
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf peven_mask(const PacketXf& /* unused */) {
|
||||
float kTrue = numext::bit_cast<float>(int32_t(-1));
|
||||
float kFalse = 0.0f;
|
||||
return PacketXf{kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, kTrue, kFalse,
|
||||
kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, kTrue, kFalse};
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd peven_mask(const PacketXd& /* unused */) {
|
||||
double kTrue = numext::bit_cast<double>(int64_t(-1l));
|
||||
double kFalse = 0.0;
|
||||
return PacketXd{kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, kTrue, kFalse};
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
|
||||
// Helpers for ptranspose.
|
||||
namespace detail {
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place2(Packet& p1, Packet& p2) {
|
||||
Packet tmp = __builtin_shufflevector(p1, p2, 0, 2);
|
||||
p2 = __builtin_shufflevector(p1, p2, 1, 3);
|
||||
p1 = tmp;
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place4(Packet& p1, Packet& p2) {
|
||||
Packet tmp = __builtin_shufflevector(p1, p2, 0, 4, 1, 5);
|
||||
p2 = __builtin_shufflevector(p1, p2, 2, 6, 3, 7);
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place16(Packet& p1, Packet& p2) {
|
||||
Packet tmp = __builtin_shufflevector(p1, p2, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23);
|
||||
p2 = __builtin_shufflevector(p1, p2, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31);
|
||||
p1 = tmp;
|
||||
}
|
||||
|
||||
@@ -986,68 +739,28 @@ EIGEN_ALWAYS_INLINE void zip_in_place8(Packet& p1, Packet& p2) {
|
||||
p1 = tmp;
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place16(Packet& p1, Packet& p2) {
|
||||
Packet tmp = __builtin_shufflevector(p1, p2, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23);
|
||||
p2 = __builtin_shufflevector(p1, p2, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31);
|
||||
p1 = tmp;
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
void zip_in_place(Packet& p1, Packet& p2);
|
||||
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
template <>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<PacketXf>(PacketXf& p1, PacketXf& p2) {
|
||||
zip_in_place4(p1, p2);
|
||||
}
|
||||
template <>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<PacketXd>(PacketXd& p1, PacketXd& p2) {
|
||||
zip_in_place2(p1, p2);
|
||||
}
|
||||
template <>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<PacketXi>(PacketXi& p1, PacketXi& p2) {
|
||||
zip_in_place4(p1, p2);
|
||||
}
|
||||
template <>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<PacketXl>(PacketXl& p1, PacketXl& p2) {
|
||||
zip_in_place2(p1, p2);
|
||||
}
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
template <>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<PacketXf>(PacketXf& p1, PacketXf& p2) {
|
||||
zip_in_place8(p1, p2);
|
||||
}
|
||||
template <>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<PacketXd>(PacketXd& p1, PacketXd& p2) {
|
||||
zip_in_place4(p1, p2);
|
||||
}
|
||||
template <>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<PacketXi>(PacketXi& p1, PacketXi& p2) {
|
||||
zip_in_place8(p1, p2);
|
||||
}
|
||||
template <>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<PacketXl>(PacketXl& p1, PacketXl& p2) {
|
||||
zip_in_place4(p1, p2);
|
||||
}
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
template <>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<PacketXf>(PacketXf& p1, PacketXf& p2) {
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<Packet16f>(Packet16f& p1, Packet16f& p2) {
|
||||
zip_in_place16(p1, p2);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<PacketXd>(PacketXd& p1, PacketXd& p2) {
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<Packet8d>(Packet8d& p1, Packet8d& p2) {
|
||||
zip_in_place8(p1, p2);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<PacketXi>(PacketXi& p1, PacketXi& p2) {
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<Packet16i>(Packet16i& p1, Packet16i& p2) {
|
||||
zip_in_place16(p1, p2);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<PacketXl>(PacketXl& p1, PacketXl& p2) {
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<Packet8l>(Packet8l& p1, Packet8l& p2) {
|
||||
zip_in_place8(p1, p2);
|
||||
}
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_ALWAYS_INLINE void ptranspose_impl(PacketBlock<Packet, 2>& kernel) {
|
||||
@@ -1099,68 +812,61 @@ EIGEN_ALWAYS_INLINE void ptranspose_impl(PacketBlock<Packet, 16>& kernel) {
|
||||
|
||||
} // namespace detail
|
||||
|
||||
// ptranspose overloads: only emit valid block sizes per vector size.
|
||||
// At 16 bytes: float has 4 elems, double has 2 elems.
|
||||
// At 32 bytes: float has 8 elems, double has 4 elems.
|
||||
// At 64 bytes: float has 16 elems, double has 8 elems.
|
||||
|
||||
// All sizes support PacketBlock<PacketXf, 2> and PacketBlock<PacketXf, 4>.
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<PacketXf, 4>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<PacketXf, 2>& kernel) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16f, 16>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
|
||||
// All sizes support PacketBlock<PacketXd, 2>.
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<PacketXd, 2>& kernel) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16f, 8>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
|
||||
// All sizes support PacketBlock<PacketXi, 2> and PacketBlock<PacketXi, 4>.
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<PacketXi, 4>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<PacketXi, 2>& kernel) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16f, 4>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
|
||||
// All sizes support PacketBlock<PacketXl, 2>.
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<PacketXl, 2>& kernel) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16f, 2>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 32
|
||||
// 32+ bytes: float has 8+ elems, double has 4+ elems.
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<PacketXf, 8>& kernel) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8d, 8>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<PacketXd, 4>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<PacketXi, 8>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<PacketXl, 4>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64
|
||||
// 64 bytes: float has 16 elems, double has 8 elems.
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<PacketXf, 16>& kernel) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8d, 4>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<PacketXd, 8>& kernel) {
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8d, 2>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<PacketXi, 16>& kernel) {
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16i, 16>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<PacketXl, 8>& kernel) {
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16i, 8>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16i, 4>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16i, 2>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8l, 8>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8l, 4>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8l, 2>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
@@ -33,10 +33,10 @@ namespace internal {
|
||||
return __builtin_reduce_or(a != 0) != 0; \
|
||||
}
|
||||
|
||||
EIGEN_CLANG_PACKET_REDUX_MINMAX(PacketXf)
|
||||
EIGEN_CLANG_PACKET_REDUX_MINMAX(PacketXd)
|
||||
EIGEN_CLANG_PACKET_REDUX_MINMAX(PacketXi)
|
||||
EIGEN_CLANG_PACKET_REDUX_MINMAX(PacketXl)
|
||||
EIGEN_CLANG_PACKET_REDUX_MINMAX(Packet16f)
|
||||
EIGEN_CLANG_PACKET_REDUX_MINMAX(Packet8d)
|
||||
EIGEN_CLANG_PACKET_REDUX_MINMAX(Packet16i)
|
||||
EIGEN_CLANG_PACKET_REDUX_MINMAX(Packet8l)
|
||||
#undef EIGEN_CLANG_PACKET_REDUX_MINMAX
|
||||
#endif
|
||||
|
||||
@@ -52,38 +52,13 @@ EIGEN_CLANG_PACKET_REDUX_MINMAX(PacketXl)
|
||||
}
|
||||
|
||||
// __builtin_reduce_{mul,add} are only defined for integer types.
|
||||
EIGEN_CLANG_PACKET_REDUX_INT(PacketXi)
|
||||
EIGEN_CLANG_PACKET_REDUX_INT(PacketXl)
|
||||
EIGEN_CLANG_PACKET_REDUX_INT(Packet16i)
|
||||
EIGEN_CLANG_PACKET_REDUX_INT(Packet8l)
|
||||
#undef EIGEN_CLANG_PACKET_REDUX_INT
|
||||
#endif
|
||||
|
||||
#if EIGEN_HAS_BUILTIN(__builtin_shufflevector)
|
||||
namespace detail {
|
||||
|
||||
// Reduction helpers for different vector sizes.
|
||||
// Each returns a pair of (even-sum, odd-sum) or (even-product, odd-product).
|
||||
|
||||
template <typename VectorT>
|
||||
EIGEN_STRONG_INLINE std::pair<scalar_type_of_vector_t<VectorT>, scalar_type_of_vector_t<VectorT>> ReduceAdd2(
|
||||
const VectorT& a) {
|
||||
return {a[0], a[1]};
|
||||
}
|
||||
|
||||
template <typename VectorT>
|
||||
EIGEN_STRONG_INLINE std::pair<scalar_type_of_vector_t<VectorT>, scalar_type_of_vector_t<VectorT>> ReduceAdd4(
|
||||
const VectorT& a) {
|
||||
const auto t1 = __builtin_shufflevector(a, a, 0, 1) + __builtin_shufflevector(a, a, 2, 3);
|
||||
return {t1[0], t1[1]};
|
||||
}
|
||||
|
||||
template <typename VectorT>
|
||||
EIGEN_STRONG_INLINE std::pair<scalar_type_of_vector_t<VectorT>, scalar_type_of_vector_t<VectorT>> ReduceAdd8(
|
||||
const VectorT& a) {
|
||||
const auto t1 = __builtin_shufflevector(a, a, 0, 1, 2, 3) + __builtin_shufflevector(a, a, 4, 5, 6, 7);
|
||||
const auto t2 = __builtin_shufflevector(t1, t1, 0, 1) + __builtin_shufflevector(t1, t1, 2, 3);
|
||||
return {t2[0], t2[1]};
|
||||
}
|
||||
|
||||
template <typename VectorT>
|
||||
EIGEN_STRONG_INLINE std::pair<scalar_type_of_vector_t<VectorT>, scalar_type_of_vector_t<VectorT>> ReduceAdd16(
|
||||
const VectorT& a) {
|
||||
@@ -95,23 +70,10 @@ EIGEN_STRONG_INLINE std::pair<scalar_type_of_vector_t<VectorT>, scalar_type_of_v
|
||||
}
|
||||
|
||||
template <typename VectorT>
|
||||
EIGEN_STRONG_INLINE std::pair<scalar_type_of_vector_t<VectorT>, scalar_type_of_vector_t<VectorT>> ReduceMul2(
|
||||
EIGEN_STRONG_INLINE std::pair<scalar_type_of_vector_t<VectorT>, scalar_type_of_vector_t<VectorT>> ReduceAdd8(
|
||||
const VectorT& a) {
|
||||
return {a[0], a[1]};
|
||||
}
|
||||
|
||||
template <typename VectorT>
|
||||
EIGEN_STRONG_INLINE std::pair<scalar_type_of_vector_t<VectorT>, scalar_type_of_vector_t<VectorT>> ReduceMul4(
|
||||
const VectorT& a) {
|
||||
const auto t1 = __builtin_shufflevector(a, a, 0, 1) * __builtin_shufflevector(a, a, 2, 3);
|
||||
return {t1[0], t1[1]};
|
||||
}
|
||||
|
||||
template <typename VectorT>
|
||||
EIGEN_STRONG_INLINE std::pair<scalar_type_of_vector_t<VectorT>, scalar_type_of_vector_t<VectorT>> ReduceMul8(
|
||||
const VectorT& a) {
|
||||
const auto t1 = __builtin_shufflevector(a, a, 0, 1, 2, 3) * __builtin_shufflevector(a, a, 4, 5, 6, 7);
|
||||
const auto t2 = __builtin_shufflevector(t1, t1, 0, 1) * __builtin_shufflevector(t1, t1, 2, 3);
|
||||
const auto t1 = __builtin_shufflevector(a, a, 0, 1, 2, 3) + __builtin_shufflevector(a, a, 4, 5, 6, 7);
|
||||
const auto t2 = __builtin_shufflevector(t1, t1, 0, 1) + __builtin_shufflevector(t1, t1, 2, 3);
|
||||
return {t2[0], t2[1]};
|
||||
}
|
||||
|
||||
@@ -124,188 +86,57 @@ EIGEN_STRONG_INLINE std::pair<scalar_type_of_vector_t<VectorT>, scalar_type_of_v
|
||||
const auto t3 = __builtin_shufflevector(t2, t2, 0, 1) * __builtin_shufflevector(t2, t2, 2, 3);
|
||||
return {t3[0], t3[1]};
|
||||
}
|
||||
|
||||
template <typename VectorT>
|
||||
EIGEN_STRONG_INLINE std::pair<scalar_type_of_vector_t<VectorT>, scalar_type_of_vector_t<VectorT>> ReduceMul8(
|
||||
const VectorT& a) {
|
||||
const auto t1 = __builtin_shufflevector(a, a, 0, 1, 2, 3) * __builtin_shufflevector(a, a, 4, 5, 6, 7);
|
||||
const auto t2 = __builtin_shufflevector(t1, t1, 0, 1) * __builtin_shufflevector(t1, t1, 2, 3);
|
||||
return {t2[0], t2[1]};
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
// --- predux and predux_mul for float ---
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux<PacketXf>(const PacketXf& a) {
|
||||
float even, odd;
|
||||
std::tie(even, odd) = detail::ReduceAdd4(a);
|
||||
return even + odd;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_mul<PacketXf>(const PacketXf& a) {
|
||||
float even, odd;
|
||||
std::tie(even, odd) = detail::ReduceMul4(a);
|
||||
return even * odd;
|
||||
}
|
||||
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux<PacketXf>(const PacketXf& a) {
|
||||
float even, odd;
|
||||
std::tie(even, odd) = detail::ReduceAdd8(a);
|
||||
return even + odd;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_mul<PacketXf>(const PacketXf& a) {
|
||||
float even, odd;
|
||||
std::tie(even, odd) = detail::ReduceMul8(a);
|
||||
return even * odd;
|
||||
}
|
||||
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux<PacketXf>(const PacketXf& a) {
|
||||
EIGEN_STRONG_INLINE float predux<Packet16f>(const Packet16f& a) {
|
||||
float even, odd;
|
||||
std::tie(even, odd) = detail::ReduceAdd16(a);
|
||||
return even + odd;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_mul<PacketXf>(const PacketXf& a) {
|
||||
float even, odd;
|
||||
std::tie(even, odd) = detail::ReduceMul16(a);
|
||||
return even * odd;
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
|
||||
// --- predux and predux_mul for double ---
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux<PacketXd>(const PacketXd& a) {
|
||||
double even, odd;
|
||||
std::tie(even, odd) = detail::ReduceAdd2(a);
|
||||
return even + odd;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_mul<PacketXd>(const PacketXd& a) {
|
||||
double even, odd;
|
||||
std::tie(even, odd) = detail::ReduceMul2(a);
|
||||
return even * odd;
|
||||
}
|
||||
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux<PacketXd>(const PacketXd& a) {
|
||||
double even, odd;
|
||||
std::tie(even, odd) = detail::ReduceAdd4(a);
|
||||
return even + odd;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_mul<PacketXd>(const PacketXd& a) {
|
||||
double even, odd;
|
||||
std::tie(even, odd) = detail::ReduceMul4(a);
|
||||
return even * odd;
|
||||
}
|
||||
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux<PacketXd>(const PacketXd& a) {
|
||||
EIGEN_STRONG_INLINE double predux<Packet8d>(const Packet8d& a) {
|
||||
double even, odd;
|
||||
std::tie(even, odd) = detail::ReduceAdd8(a);
|
||||
return even + odd;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_mul<PacketXd>(const PacketXd& a) {
|
||||
EIGEN_STRONG_INLINE float predux_mul<Packet16f>(const Packet16f& a) {
|
||||
float even, odd;
|
||||
std::tie(even, odd) = detail::ReduceMul16(a);
|
||||
return even * odd;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_mul<Packet8d>(const Packet8d& a) {
|
||||
double even, odd;
|
||||
std::tie(even, odd) = detail::ReduceMul8(a);
|
||||
return even * odd;
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
|
||||
// --- predux for complex<float> ---
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<float> predux<PacketXcf>(const PacketXcf& a) {
|
||||
float re, im;
|
||||
std::tie(re, im) = detail::ReduceAdd4(a.v);
|
||||
return std::complex<float>(re, im);
|
||||
}
|
||||
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<float> predux<PacketXcf>(const PacketXcf& a) {
|
||||
float re, im;
|
||||
std::tie(re, im) = detail::ReduceAdd8(a.v);
|
||||
return std::complex<float>(re, im);
|
||||
}
|
||||
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<float> predux<PacketXcf>(const PacketXcf& a) {
|
||||
EIGEN_STRONG_INLINE std::complex<float> predux<Packet8cf>(const Packet8cf& a) {
|
||||
float re, im;
|
||||
std::tie(re, im) = detail::ReduceAdd16(a.v);
|
||||
return std::complex<float>(re, im);
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
|
||||
// --- predux for complex<double> ---
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<double> predux<PacketXcd>(const PacketXcd& a) {
|
||||
// 1 complex double: just return it
|
||||
return a[0];
|
||||
}
|
||||
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<double> predux<PacketXcd>(const PacketXcd& a) {
|
||||
double re, im;
|
||||
std::tie(re, im) = detail::ReduceAdd4(a.v);
|
||||
return std::complex<double>(re, im);
|
||||
}
|
||||
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<double> predux<PacketXcd>(const PacketXcd& a) {
|
||||
EIGEN_STRONG_INLINE std::complex<double> predux<Packet4cd>(const Packet4cd& a) {
|
||||
double re, im;
|
||||
std::tie(re, im) = detail::ReduceAdd8(a.v);
|
||||
return std::complex<double>(re, im);
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
|
||||
// --- predux_mul for complex<float> ---
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<float> predux_mul<PacketXcf>(const PacketXcf& a) {
|
||||
// 2 complex floats: just multiply them
|
||||
return a[0] * a[1];
|
||||
}
|
||||
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<float> predux_mul<PacketXcf>(const PacketXcf& a) {
|
||||
// 4 complex floats: split into 2+2, multiply, then scalar multiply
|
||||
const Packet2cf lower2 = Packet2cf(__builtin_shufflevector(a.v, a.v, 0, 1, 2, 3));
|
||||
const Packet2cf upper2 = Packet2cf(__builtin_shufflevector(a.v, a.v, 4, 5, 6, 7));
|
||||
const Packet2cf prod2 = pmul<Packet2cf>(lower2, upper2);
|
||||
return prod2[0] * prod2[1];
|
||||
}
|
||||
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<float> predux_mul<PacketXcf>(const PacketXcf& a) {
|
||||
// 8 complex floats: 8->4->2->scalar
|
||||
EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet8cf>(const Packet8cf& a) {
|
||||
const Packet4cf lower4 = Packet4cf(__builtin_shufflevector(a.v, a.v, 0, 1, 2, 3, 4, 5, 6, 7));
|
||||
const Packet4cf upper4 = Packet4cf(__builtin_shufflevector(a.v, a.v, 8, 9, 10, 11, 12, 13, 14, 15));
|
||||
const Packet4cf prod4 = pmul<Packet4cf>(lower4, upper4);
|
||||
@@ -315,38 +146,14 @@ EIGEN_STRONG_INLINE std::complex<float> predux_mul<PacketXcf>(const PacketXcf& a
|
||||
return prod2[0] * prod2[1];
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
|
||||
// --- predux_mul for complex<double> ---
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<double> predux_mul<PacketXcd>(const PacketXcd& a) {
|
||||
// 1 complex double: just return it
|
||||
return a[0];
|
||||
}
|
||||
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<double> predux_mul<PacketXcd>(const PacketXcd& a) {
|
||||
// 2 complex doubles: just multiply them
|
||||
return a[0] * a[1];
|
||||
}
|
||||
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<double> predux_mul<PacketXcd>(const PacketXcd& a) {
|
||||
// 4 complex doubles: split into 2+2, multiply, then scalar multiply
|
||||
EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet4cd>(const Packet4cd& a) {
|
||||
const Packet2cd lower2 = Packet2cd(__builtin_shufflevector(a.v, a.v, 0, 1, 2, 3));
|
||||
const Packet2cd upper2 = Packet2cd(__builtin_shufflevector(a.v, a.v, 4, 5, 6, 7));
|
||||
const Packet2cd prod2 = pmul<Packet2cd>(lower2, upper2);
|
||||
return prod2[0] * prod2[1];
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
@@ -20,140 +20,56 @@ namespace internal {
|
||||
// preinterpret
|
||||
//==============================================================================
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf preinterpret<PacketXf, PacketXi>(const PacketXi& a) {
|
||||
return reinterpret_cast<PacketXf>(a);
|
||||
EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet16i>(const Packet16i& a) {
|
||||
return reinterpret_cast<Packet16f>(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXi preinterpret<PacketXi, PacketXf>(const PacketXf& a) {
|
||||
return reinterpret_cast<PacketXi>(a);
|
||||
EIGEN_STRONG_INLINE Packet16i preinterpret<Packet16i, Packet16f>(const Packet16f& a) {
|
||||
return reinterpret_cast<Packet16i>(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd preinterpret<PacketXd, PacketXl>(const PacketXl& a) {
|
||||
return reinterpret_cast<PacketXd>(a);
|
||||
EIGEN_STRONG_INLINE Packet8d preinterpret<Packet8d, Packet8l>(const Packet8l& a) {
|
||||
return reinterpret_cast<Packet8d>(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXl preinterpret<PacketXl, PacketXd>(const PacketXd& a) {
|
||||
return reinterpret_cast<PacketXl>(a);
|
||||
EIGEN_STRONG_INLINE Packet8l preinterpret<Packet8l, Packet8d>(const Packet8d& a) {
|
||||
return reinterpret_cast<Packet8l>(a);
|
||||
}
|
||||
|
||||
//==============================================================================
|
||||
// pcast
|
||||
//==============================================================================
|
||||
#if EIGEN_HAS_BUILTIN(__builtin_convertvector)
|
||||
// Float-to-int conversions: __builtin_convertvector has UB for NaN/inf/
|
||||
// out-of-range inputs. Replace NaN with 0 before converting so that
|
||||
// pldexp_fast (which may pass NaN exponents) doesn't trigger UB.
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXi pcast<PacketXf, PacketXi>(const PacketXf& a) {
|
||||
const PacketXf safe = a == a ? a : PacketXf(0);
|
||||
return __builtin_convertvector(safe, PacketXi);
|
||||
EIGEN_STRONG_INLINE Packet16i pcast<Packet16f, Packet16i>(const Packet16f& a) {
|
||||
return __builtin_convertvector(a, Packet16i);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf pcast<PacketXi, PacketXf>(const PacketXi& a) {
|
||||
return __builtin_convertvector(a, PacketXf);
|
||||
EIGEN_STRONG_INLINE Packet16f pcast<Packet16i, Packet16f>(const Packet16i& a) {
|
||||
return __builtin_convertvector(a, Packet16f);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXl pcast<PacketXd, PacketXl>(const PacketXd& a) {
|
||||
const PacketXd safe = a == a ? a : PacketXd(0);
|
||||
return __builtin_convertvector(safe, PacketXl);
|
||||
EIGEN_STRONG_INLINE Packet8l pcast<Packet8d, Packet8l>(const Packet8d& a) {
|
||||
return __builtin_convertvector(a, Packet8l);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd pcast<PacketXl, PacketXd>(const PacketXl& a) {
|
||||
return __builtin_convertvector(a, PacketXd);
|
||||
EIGEN_STRONG_INLINE Packet8d pcast<Packet8l, Packet8d>(const Packet8l& a) {
|
||||
return __builtin_convertvector(a, Packet8d);
|
||||
}
|
||||
|
||||
// float -> double: converts lower half of floats to doubles
|
||||
// double -> float: converts two PacketXd to one PacketXf
|
||||
// int32 -> int64: converts lower half of int32s to int64s
|
||||
// int64 -> int32: converts two PacketXl to one PacketXi
|
||||
|
||||
#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16
|
||||
|
||||
// float -> double: converts lower 2 floats to 2 doubles
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd pcast<PacketXf, PacketXd>(const PacketXf& a) {
|
||||
using HalfFloat = detail::VectorType<float, 2>;
|
||||
HalfFloat lo = __builtin_shufflevector(a, a, 0, 1);
|
||||
return __builtin_convertvector(lo, PacketXd);
|
||||
}
|
||||
|
||||
// double -> float: converts two PacketXd (2 doubles each) to one PacketXf (4 floats)
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf pcast<PacketXd, PacketXf>(const PacketXd& a, const PacketXd& b) {
|
||||
using HalfFloat = detail::VectorType<float, 2>;
|
||||
HalfFloat lo = __builtin_convertvector(a, HalfFloat);
|
||||
HalfFloat hi = __builtin_convertvector(b, HalfFloat);
|
||||
return __builtin_shufflevector(lo, hi, 0, 1, 2, 3);
|
||||
}
|
||||
|
||||
// int32 -> int64: converts lower 2 int32s to 2 int64s
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXl pcast<PacketXi, PacketXl>(const PacketXi& a) {
|
||||
using HalfInt = detail::VectorType<int32_t, 2>;
|
||||
HalfInt lo = __builtin_shufflevector(a, a, 0, 1);
|
||||
return __builtin_convertvector(lo, PacketXl);
|
||||
}
|
||||
|
||||
// int64 -> int32: converts two PacketXl (2 int64s each) to one PacketXi (4 int32s)
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXi pcast<PacketXl, PacketXi>(const PacketXl& a, const PacketXl& b) {
|
||||
using HalfInt = detail::VectorType<int32_t, 2>;
|
||||
HalfInt lo = __builtin_convertvector(a, HalfInt);
|
||||
HalfInt hi = __builtin_convertvector(b, HalfInt);
|
||||
return __builtin_shufflevector(lo, hi, 0, 1, 2, 3);
|
||||
}
|
||||
|
||||
#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32
|
||||
|
||||
// float -> double: converts lower 4 floats to 4 doubles
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd pcast<PacketXf, PacketXd>(const PacketXf& a) {
|
||||
using HalfFloat = detail::VectorType<float, 4>;
|
||||
HalfFloat lo = __builtin_shufflevector(a, a, 0, 1, 2, 3);
|
||||
return __builtin_convertvector(lo, PacketXd);
|
||||
}
|
||||
|
||||
// double -> float: converts two PacketXd (4 doubles each) to one PacketXf (8 floats)
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf pcast<PacketXd, PacketXf>(const PacketXd& a, const PacketXd& b) {
|
||||
using HalfFloat = detail::VectorType<float, 4>;
|
||||
HalfFloat lo = __builtin_convertvector(a, HalfFloat);
|
||||
HalfFloat hi = __builtin_convertvector(b, HalfFloat);
|
||||
return __builtin_shufflevector(lo, hi, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
}
|
||||
|
||||
// int32 -> int64: converts lower 4 int32s to 4 int64s
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXl pcast<PacketXi, PacketXl>(const PacketXi& a) {
|
||||
using HalfInt = detail::VectorType<int32_t, 4>;
|
||||
HalfInt lo = __builtin_shufflevector(a, a, 0, 1, 2, 3);
|
||||
return __builtin_convertvector(lo, PacketXl);
|
||||
}
|
||||
|
||||
// int64 -> int32: converts two PacketXl (4 int64s each) to one PacketXi (8 int32s)
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXi pcast<PacketXl, PacketXi>(const PacketXl& a, const PacketXl& b) {
|
||||
using HalfInt = detail::VectorType<int32_t, 4>;
|
||||
HalfInt lo = __builtin_convertvector(a, HalfInt);
|
||||
HalfInt hi = __builtin_convertvector(b, HalfInt);
|
||||
return __builtin_shufflevector(lo, hi, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
}
|
||||
|
||||
#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64
|
||||
|
||||
// float -> double: converts lower 8 floats to 8 doubles
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd pcast<PacketXf, PacketXd>(const PacketXf& a) {
|
||||
EIGEN_STRONG_INLINE Packet8d pcast<Packet16f, Packet8d>(const Packet16f& a) {
|
||||
using HalfFloat = detail::VectorType<float, 8>;
|
||||
HalfFloat lo = __builtin_shufflevector(a, a, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
return __builtin_convertvector(lo, PacketXd);
|
||||
return __builtin_convertvector(lo, Packet8d);
|
||||
}
|
||||
|
||||
// double -> float: converts two PacketXd to one PacketXf
|
||||
// double -> float: converts two Packet8d to one Packet16f
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf pcast<PacketXd, PacketXf>(const PacketXd& a, const PacketXd& b) {
|
||||
EIGEN_STRONG_INLINE Packet16f pcast<Packet8d, Packet16f>(const Packet8d& a, const Packet8d& b) {
|
||||
using HalfFloat = detail::VectorType<float, 8>;
|
||||
HalfFloat lo = __builtin_convertvector(a, HalfFloat);
|
||||
HalfFloat hi = __builtin_convertvector(b, HalfFloat);
|
||||
@@ -162,22 +78,20 @@ EIGEN_STRONG_INLINE PacketXf pcast<PacketXd, PacketXf>(const PacketXd& a, const
|
||||
|
||||
// int32 -> int64: converts lower 8 int32s to 8 int64s
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXl pcast<PacketXi, PacketXl>(const PacketXi& a) {
|
||||
EIGEN_STRONG_INLINE Packet8l pcast<Packet16i, Packet8l>(const Packet16i& a) {
|
||||
using HalfInt = detail::VectorType<int32_t, 8>;
|
||||
HalfInt lo = __builtin_shufflevector(a, a, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
return __builtin_convertvector(lo, PacketXl);
|
||||
return __builtin_convertvector(lo, Packet8l);
|
||||
}
|
||||
|
||||
// int64 -> int32: converts two PacketXl to one PacketXi
|
||||
// int64 -> int32: converts two Packet8l to one Packet16i
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXi pcast<PacketXl, PacketXi>(const PacketXl& a, const PacketXl& b) {
|
||||
EIGEN_STRONG_INLINE Packet16i pcast<Packet8l, Packet16i>(const Packet8l& a, const Packet8l& b) {
|
||||
using HalfInt = detail::VectorType<int32_t, 8>;
|
||||
HalfInt lo = __builtin_convertvector(a, HalfInt);
|
||||
HalfInt hi = __builtin_convertvector(b, HalfInt);
|
||||
return __builtin_shufflevector(lo, hi, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
@@ -179,7 +179,6 @@ ei_add_test(numext)
|
||||
ei_add_test(sizeof)
|
||||
ei_add_test(dynalloc)
|
||||
ei_add_test(nomalloc)
|
||||
ei_add_test(noresize)
|
||||
ei_add_test(first_aligned)
|
||||
ei_add_test(type_alias)
|
||||
ei_add_test(nullary)
|
||||
@@ -187,17 +186,6 @@ ei_add_test(mixingtypes)
|
||||
ei_add_test(float_conversion)
|
||||
ei_add_test(io)
|
||||
ei_add_test(packetmath "-DEIGEN_FAST_MATH=1")
|
||||
# Generic clang vector backend tests for different vector sizes.
|
||||
include(CheckCXXSourceCompiles)
|
||||
check_cxx_source_compiles("
|
||||
typedef float v4sf __attribute__((ext_vector_type(4)));
|
||||
int main() { return __builtin_vectorelements(v4sf{}); }
|
||||
" COMPILER_SUPPORTS_VECTOR_EXTENSIONS)
|
||||
if(COMPILER_SUPPORTS_VECTOR_EXTENSIONS)
|
||||
ei_add_test(packetmath_generic_16 "-DEIGEN_FAST_MATH=1")
|
||||
ei_add_test(packetmath_generic_32 "-DEIGEN_FAST_MATH=1")
|
||||
ei_add_test(packetmath_generic_64 "-DEIGEN_FAST_MATH=1")
|
||||
endif()
|
||||
ei_add_test(packet_segment)
|
||||
ei_add_test(vectorization_logic)
|
||||
ei_add_test(basicstuff)
|
||||
|
||||
@@ -33,8 +33,6 @@ EIGEN_STRONG_INLINE DstXprType& copy_using_evaluator(const PlainObjectBase<DstXp
|
||||
eigen_assert((dst.size() == 0 || (IsVectorAtCompileTime ? (dst.size() == src.size())
|
||||
: (dst.rows() == src.rows() && dst.cols() == src.cols()))) &&
|
||||
"Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
|
||||
// Allow resizing of default-constructed (empty) destinations.
|
||||
if (dst.size() == 0) dst.const_cast_derived().resizeLike(src.derived());
|
||||
#else
|
||||
dst.const_cast_derived().resizeLike(src.derived());
|
||||
#endif
|
||||
|
||||
@@ -1,110 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2026 Rasmus Munk Larsen <rmlarsen@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
// Must be defined before including any Eigen headers.
|
||||
#define EIGEN_NO_AUTOMATIC_RESIZING
|
||||
|
||||
#include "main.h"
|
||||
|
||||
// Helper to create a random matrix respecting compile-time fixed dimensions.
|
||||
template <typename MatrixType>
|
||||
MatrixType random_matrix() {
|
||||
enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime };
|
||||
Index rows = (RowsAtCompileTime == Dynamic) ? internal::random<Index>(1, 10) : Index(RowsAtCompileTime);
|
||||
Index cols = (ColsAtCompileTime == Dynamic) ? internal::random<Index>(1, 10) : Index(ColsAtCompileTime);
|
||||
return MatrixType::Random(rows, cols);
|
||||
}
|
||||
|
||||
template <typename MatrixType>
|
||||
void noresize_assign_to_empty() {
|
||||
MatrixType src = random_matrix<MatrixType>();
|
||||
|
||||
// Assigning to a default-constructed (empty) destination should work.
|
||||
MatrixType dst;
|
||||
dst = src;
|
||||
VERIFY_IS_EQUAL(dst.rows(), src.rows());
|
||||
VERIFY_IS_EQUAL(dst.cols(), src.cols());
|
||||
VERIFY_IS_APPROX(dst, src);
|
||||
}
|
||||
|
||||
template <typename MatrixType>
|
||||
void noresize_assign_expression_to_empty() {
|
||||
MatrixType a = random_matrix<MatrixType>();
|
||||
MatrixType b(a.rows(), a.cols());
|
||||
b.setRandom();
|
||||
|
||||
// Assigning an expression to an empty destination should work.
|
||||
MatrixType dst;
|
||||
dst = a + b;
|
||||
VERIFY_IS_EQUAL(dst.rows(), a.rows());
|
||||
VERIFY_IS_EQUAL(dst.cols(), a.cols());
|
||||
VERIFY_IS_APPROX(dst, a + b);
|
||||
}
|
||||
|
||||
template <typename MatrixType>
|
||||
void noresize_construct_from_expression() {
|
||||
MatrixType a = random_matrix<MatrixType>();
|
||||
|
||||
// Construction from an expression should work.
|
||||
MatrixType dst = a * 2;
|
||||
VERIFY_IS_EQUAL(dst.rows(), a.rows());
|
||||
VERIFY_IS_EQUAL(dst.cols(), a.cols());
|
||||
VERIFY_IS_APPROX(dst, a * 2);
|
||||
}
|
||||
|
||||
template <typename MatrixType>
|
||||
void noresize_col_access() {
|
||||
MatrixType src = random_matrix<MatrixType>();
|
||||
|
||||
// Assigning to empty, then accessing columns should work.
|
||||
MatrixType dst;
|
||||
dst = src;
|
||||
for (Index j = 0; j < src.cols(); ++j) {
|
||||
VERIFY_IS_APPROX(dst.col(j), src.col(j));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename MatrixType>
|
||||
void noresize_size_mismatch() {
|
||||
enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime };
|
||||
Index rows = (RowsAtCompileTime == Dynamic) ? internal::random<Index>(2, 10) : Index(RowsAtCompileTime);
|
||||
Index cols = (ColsAtCompileTime == Dynamic) ? internal::random<Index>(2, 10) : Index(ColsAtCompileTime);
|
||||
MatrixType src = MatrixType::Random(rows, cols);
|
||||
// Create a destination with at least one mismatched dynamic dimension.
|
||||
Index dst_rows = (RowsAtCompileTime == Dynamic) ? rows + 1 : rows;
|
||||
Index dst_cols = (ColsAtCompileTime == Dynamic) ? cols + 1 : cols;
|
||||
MatrixType dst = MatrixType::Random(dst_rows, dst_cols);
|
||||
|
||||
// Assigning to a non-empty destination with different size should assert.
|
||||
VERIFY_RAISES_ASSERT(dst = src);
|
||||
}
|
||||
|
||||
EIGEN_DECLARE_TEST(noresize) {
|
||||
CALL_SUBTEST_1(noresize_assign_to_empty<MatrixXf>());
|
||||
CALL_SUBTEST_1(noresize_assign_to_empty<MatrixXd>());
|
||||
CALL_SUBTEST_1(noresize_assign_to_empty<MatrixXcf>());
|
||||
CALL_SUBTEST_1(noresize_assign_to_empty<MatrixXcd>());
|
||||
CALL_SUBTEST_2(noresize_assign_to_empty<ArrayXXd>());
|
||||
CALL_SUBTEST_2(noresize_assign_to_empty<ArrayXXcd>());
|
||||
CALL_SUBTEST_3(noresize_assign_to_empty<VectorXf>());
|
||||
CALL_SUBTEST_3(noresize_assign_to_empty<RowVectorXd>());
|
||||
|
||||
CALL_SUBTEST_4(noresize_assign_expression_to_empty<MatrixXd>());
|
||||
CALL_SUBTEST_4(noresize_assign_expression_to_empty<ArrayXXd>());
|
||||
|
||||
CALL_SUBTEST_5(noresize_construct_from_expression<MatrixXd>());
|
||||
CALL_SUBTEST_5(noresize_construct_from_expression<ArrayXXd>());
|
||||
|
||||
CALL_SUBTEST_6(noresize_col_access<MatrixXd>());
|
||||
CALL_SUBTEST_6(noresize_col_access<MatrixXf>());
|
||||
|
||||
CALL_SUBTEST_7(noresize_size_mismatch<MatrixXd>());
|
||||
CALL_SUBTEST_7(noresize_size_mismatch<MatrixXf>());
|
||||
CALL_SUBTEST_7(noresize_size_mismatch<VectorXd>());
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
// Force the generic clang vector backend with 16-byte vectors.
|
||||
#define EIGEN_VECTORIZE_GENERIC 1
|
||||
#define EIGEN_GENERIC_VECTOR_SIZE_BYTES 16
|
||||
#include "packetmath.cpp"
|
||||
@@ -1,4 +0,0 @@
|
||||
// Force the generic clang vector backend with 32-byte vectors.
|
||||
#define EIGEN_VECTORIZE_GENERIC 1
|
||||
#define EIGEN_GENERIC_VECTOR_SIZE_BYTES 32
|
||||
#include "packetmath.cpp"
|
||||
@@ -1,4 +0,0 @@
|
||||
// Force the generic clang vector backend with 64-byte vectors.
|
||||
#define EIGEN_VECTORIZE_GENERIC 1
|
||||
#define EIGEN_GENERIC_VECTOR_SIZE_BYTES 64
|
||||
#include "packetmath.cpp"
|
||||
Reference in New Issue
Block a user