From ea25ea52bb58bb16826ed9211ef7931cae99a728 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen <4643818-rmlarsen1@users.noreply.gitlab.com> Date: Wed, 25 Feb 2026 08:31:41 -0800 Subject: [PATCH] Revert accidental changes from !2212 squash merge libeigen/eigen!2214 Co-authored-by: Rasmus Munk Larsen --- Eigen/src/Core/AssignEvaluator.h | 4 +- Eigen/src/Core/PlainObjectBase.h | 3 +- Eigen/src/Core/arch/clang/Complex.h | 521 ++++-------------- Eigen/src/Core/arch/clang/MathFunctions.h | 12 +- Eigen/src/Core/arch/clang/PacketMath.h | 620 ++++++---------------- Eigen/src/Core/arch/clang/Reductions.h | 253 ++------- Eigen/src/Core/arch/clang/TypeCasting.h | 134 +---- test/CMakeLists.txt | 12 - test/evaluators.cpp | 2 - test/noresize.cpp | 110 ---- test/packetmath_generic_16.cpp | 4 - test/packetmath_generic_32.cpp | 4 - test/packetmath_generic_64.cpp | 4 - 13 files changed, 331 insertions(+), 1352 deletions(-) delete mode 100644 test/noresize.cpp delete mode 100644 test/packetmath_generic_16.cpp delete mode 100644 test/packetmath_generic_32.cpp delete mode 100644 test/packetmath_generic_64.cpp diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 4762d7169..3afdd3e62 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -810,12 +810,10 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprTyp (dst.size() == 0 || (DstXprType::IsVectorAtCompileTime ? (dst.size() == src.size()) : (dst.rows() == dstRows && dst.cols() == dstCols))) && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); - // Allow resizing of default-constructed (empty) destinations. - if (dst.size() == 0) dst.resize(dstRows, dstCols); #else dst.resize(dstRows, dstCols); -#endif eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols); +#endif } } diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 25e402c97..1544a5821 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -685,8 +685,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type { eigen_assert((this->size() == 0 || (IsVectorAtCompileTime ? (this->size() == other.size()) : (rows() == other.rows() && cols() == other.cols()))) && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); - // Allow resizing of default-constructed (empty) destinations. - if (this->size() == 0) resizeLike(other); + EIGEN_ONLY_USED_FOR_DEBUG(other); #else resizeLike(other); #endif diff --git a/Eigen/src/Core/arch/clang/Complex.h b/Eigen/src/Core/arch/clang/Complex.h index 20ac53d6f..6b8e7768e 100644 --- a/Eigen/src/Core/arch/clang/Complex.h +++ b/Eigen/src/Core/arch/clang/Complex.h @@ -27,23 +27,11 @@ struct complex_packet_wrapper { RealPacketT v; }; -// --- Primary complex packet aliases --- -constexpr int kComplexFloatSize = kFloatPacketSize / 2; // 2, 4, or 8 -constexpr int kComplexDoubleSize = kDoublePacketSize / 2; // 1, 2, or 4 -using PacketXcf = complex_packet_wrapper; -using PacketXcd = complex_packet_wrapper; - -// Sub-packet types needed for reductions at larger sizes. -// When PacketXcf IS already a given size, we skip the alias to avoid duplicates. -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 32 -using Packet2cf = complex_packet_wrapper; -#endif -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64 +using Packet8cf = complex_packet_wrapper; using Packet4cf = complex_packet_wrapper; -#endif -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64 +using Packet2cf = complex_packet_wrapper; +using Packet4cd = complex_packet_wrapper; using Packet2cd = complex_packet_wrapper; -#endif struct generic_complex_packet_traits : default_packet_traits { enum { @@ -70,39 +58,39 @@ struct generic_complex_packet_traits : default_packet_traits { template <> struct packet_traits> : generic_complex_packet_traits { - using type = PacketXcf; - using half = PacketXcf; + using type = Packet8cf; + using half = Packet8cf; enum { - size = kComplexFloatSize, + size = 8, }; }; template <> -struct unpacket_traits : generic_unpacket_traits { +struct unpacket_traits : generic_unpacket_traits { using type = std::complex; - using half = PacketXcf; - using as_real = PacketXf; + using half = Packet8cf; + using as_real = Packet16f; enum { - size = kComplexFloatSize, + size = 8, }; }; template <> struct packet_traits> : generic_complex_packet_traits { - using type = PacketXcd; - using half = PacketXcd; + using type = Packet4cd; + using half = Packet4cd; enum { - size = kComplexDoubleSize, + size = 4, }; }; template <> -struct unpacket_traits : generic_unpacket_traits { +struct unpacket_traits : generic_unpacket_traits { using type = std::complex; - using half = PacketXcd; - using as_real = PacketXd; + using half = Packet4cd; + using as_real = Packet8d; enum { - size = kComplexDoubleSize, + size = 4, }; }; @@ -127,58 +115,24 @@ struct unpacket_traits : generic_unpacket_traits { pstore(&numext::real_ref(*to), from.v); \ } -EIGEN_CLANG_COMPLEX_LOAD_STORE(PacketXcf); -EIGEN_CLANG_COMPLEX_LOAD_STORE(PacketXcd); +EIGEN_CLANG_COMPLEX_LOAD_STORE(Packet8cf); +EIGEN_CLANG_COMPLEX_LOAD_STORE(Packet4cd); #undef EIGEN_CLANG_COMPLEX_LOAD_STORE -// --- pset1 for complex --- -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 - template <> -EIGEN_STRONG_INLINE PacketXcf pset1(const std::complex& from) { +EIGEN_STRONG_INLINE Packet8cf pset1(const std::complex& from) { const float re = numext::real(from); const float im = numext::imag(from); - return PacketXcf(PacketXf{re, im, re, im}); + return Packet8cf(Packet16f{re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im}); } + template <> -EIGEN_STRONG_INLINE PacketXcd pset1(const std::complex& from) { +EIGEN_STRONG_INLINE Packet4cd pset1(const std::complex& from) { const double re = numext::real(from); const double im = numext::imag(from); - return PacketXcd(PacketXd{re, im}); + return Packet4cd(Packet8d{re, im, re, im, re, im, re, im}); } -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 - -template <> -EIGEN_STRONG_INLINE PacketXcf pset1(const std::complex& from) { - const float re = numext::real(from); - const float im = numext::imag(from); - return PacketXcf(PacketXf{re, im, re, im, re, im, re, im}); -} -template <> -EIGEN_STRONG_INLINE PacketXcd pset1(const std::complex& from) { - const double re = numext::real(from); - const double im = numext::imag(from); - return PacketXcd(PacketXd{re, im, re, im}); -} - -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 - -template <> -EIGEN_STRONG_INLINE PacketXcf pset1(const std::complex& from) { - const float re = numext::real(from); - const float im = numext::imag(from); - return PacketXcf(PacketXf{re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im}); -} -template <> -EIGEN_STRONG_INLINE PacketXcd pset1(const std::complex& from) { - const double re = numext::real(from); - const double im = numext::imag(from); - return PacketXcd(PacketXd{re, im, re, im, re, im, re, im}); -} - -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES - // ----------- Unary ops ------------------ #define DELEGATE_UNARY_TO_REAL_OP(PACKET_TYPE, OP) \ template <> \ @@ -195,348 +149,134 @@ EIGEN_STRONG_INLINE PacketXcd pset1(const std::complex& from) } \ EIGEN_INSTANTIATE_COMPLEX_MATH_FUNCS(PACKET_TYPE) -EIGEN_CLANG_COMPLEX_UNARY_CWISE_OPS(PacketXcf); -EIGEN_CLANG_COMPLEX_UNARY_CWISE_OPS(PacketXcd); - -// --- pconj --- -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 +EIGEN_CLANG_COMPLEX_UNARY_CWISE_OPS(Packet8cf); +EIGEN_CLANG_COMPLEX_UNARY_CWISE_OPS(Packet4cd); template <> -EIGEN_STRONG_INLINE PacketXcf pconj(const PacketXcf& a) { - return PacketXcf(__builtin_shufflevector(a.v, -a.v, 0, 5, 2, 7)); +EIGEN_STRONG_INLINE Packet8cf pconj(const Packet8cf& a) { + return Packet8cf(__builtin_shufflevector(a.v, -a.v, 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31)); } template <> -EIGEN_STRONG_INLINE PacketXcd pconj(const PacketXcd& a) { - return PacketXcd(__builtin_shufflevector(a.v, -a.v, 0, 3)); -} - -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 - -template <> -EIGEN_STRONG_INLINE PacketXcf pconj(const PacketXcf& a) { - return PacketXcf(__builtin_shufflevector(a.v, -a.v, 0, 9, 2, 11, 4, 13, 6, 15)); -} -template <> -EIGEN_STRONG_INLINE PacketXcd pconj(const PacketXcd& a) { - return PacketXcd(__builtin_shufflevector(a.v, -a.v, 0, 5, 2, 7)); -} - -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 - -template <> -EIGEN_STRONG_INLINE PacketXcf pconj(const PacketXcf& a) { - return PacketXcf(__builtin_shufflevector(a.v, -a.v, 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31)); -} -template <> -EIGEN_STRONG_INLINE PacketXcd pconj(const PacketXcd& a) { - return PacketXcd(__builtin_shufflevector(a.v, -a.v, 0, 9, 2, 11, 4, 13, 6, 15)); -} - -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES - -// Sub-packet pconj specializations needed for reductions. -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 32 -template <> -EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { - return Packet2cf(__builtin_shufflevector(a.v, -a.v, 0, 5, 2, 7)); -} -#endif -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64 -template <> EIGEN_STRONG_INLINE Packet4cf pconj(const Packet4cf& a) { return Packet4cf(__builtin_shufflevector(a.v, -a.v, 0, 9, 2, 11, 4, 13, 6, 15)); } template <> +EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { + return Packet2cf(__builtin_shufflevector(a.v, -a.v, 0, 5, 2, 7)); +} + +template <> +EIGEN_STRONG_INLINE Packet4cd pconj(const Packet4cd& a) { + return Packet4cd(__builtin_shufflevector(a.v, -a.v, 0, 9, 2, 11, 4, 13, 6, 15)); +} +template <> EIGEN_STRONG_INLINE Packet2cd pconj(const Packet2cd& a) { return Packet2cd(__builtin_shufflevector(a.v, -a.v, 0, 5, 2, 7)); } -#endif #undef DELEGATE_UNARY_TO_REAL_OP #undef EIGEN_CLANG_COMPLEX_UNARY_CWISE_OPS // Flip real and imaginary parts, i.e. {re(a), im(a)} -> {im(a), re(a)}. -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 - template <> -EIGEN_STRONG_INLINE PacketXcf pcplxflip(const PacketXcf& a) { - return PacketXcf(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2)); +EIGEN_STRONG_INLINE Packet8cf pcplxflip(const Packet8cf& a) { + return Packet8cf(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14)); } template <> -EIGEN_STRONG_INLINE PacketXcd pcplxflip(const PacketXcd& a) { - return PacketXcd(__builtin_shufflevector(a.v, a.v, 1, 0)); -} - -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 - -template <> -EIGEN_STRONG_INLINE PacketXcf pcplxflip(const PacketXcf& a) { - return PacketXcf(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2, 5, 4, 7, 6)); -} -template <> -EIGEN_STRONG_INLINE PacketXcd pcplxflip(const PacketXcd& a) { - return PacketXcd(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2)); -} - -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 - -template <> -EIGEN_STRONG_INLINE PacketXcf pcplxflip(const PacketXcf& a) { - return PacketXcf(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14)); -} -template <> -EIGEN_STRONG_INLINE PacketXcd pcplxflip(const PacketXcd& a) { - return PacketXcd(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2, 5, 4, 7, 6)); -} - -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES - -// Sub-packet pcplxflip specializations needed for reductions. -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 32 -template <> -EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& a) { - return Packet2cf(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2)); -} -#endif -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64 -template <> EIGEN_STRONG_INLINE Packet4cf pcplxflip(const Packet4cf& a) { return Packet4cf(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2, 5, 4, 7, 6)); } template <> +EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& a) { + return Packet2cf(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2)); +} +template <> +EIGEN_STRONG_INLINE Packet4cd pcplxflip(const Packet4cd& a) { + return Packet4cd(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2, 5, 4, 7, 6)); +} +template <> EIGEN_STRONG_INLINE Packet2cd pcplxflip(const Packet2cd& a) { return Packet2cd(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2)); } -#endif // Copy real to imaginary part, i.e. {re(a), im(a)} -> {re(a), re(a)}. -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 - template <> -EIGEN_STRONG_INLINE PacketXcf pdupreal(const PacketXcf& a) { - return PacketXcf(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2)); +EIGEN_STRONG_INLINE Packet8cf pdupreal(const Packet8cf& a) { + return Packet8cf(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14)); } template <> -EIGEN_STRONG_INLINE PacketXcd pdupreal(const PacketXcd& a) { - return PacketXcd(__builtin_shufflevector(a.v, a.v, 0, 0)); -} - -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 - -template <> -EIGEN_STRONG_INLINE PacketXcf pdupreal(const PacketXcf& a) { - return PacketXcf(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2, 4, 4, 6, 6)); -} -template <> -EIGEN_STRONG_INLINE PacketXcd pdupreal(const PacketXcd& a) { - return PacketXcd(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2)); -} - -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 - -template <> -EIGEN_STRONG_INLINE PacketXcf pdupreal(const PacketXcf& a) { - return PacketXcf(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14)); -} -template <> -EIGEN_STRONG_INLINE PacketXcd pdupreal(const PacketXcd& a) { - return PacketXcd(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2, 4, 4, 6, 6)); -} - -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES - -// Sub-packet pdupreal specializations needed for reductions. -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 32 -template <> -EIGEN_STRONG_INLINE Packet2cf pdupreal(const Packet2cf& a) { - return Packet2cf(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2)); -} -#endif -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64 -template <> EIGEN_STRONG_INLINE Packet4cf pdupreal(const Packet4cf& a) { return Packet4cf(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2, 4, 4, 6, 6)); } template <> +EIGEN_STRONG_INLINE Packet2cf pdupreal(const Packet2cf& a) { + return Packet2cf(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2)); +} +template <> +EIGEN_STRONG_INLINE Packet4cd pdupreal(const Packet4cd& a) { + return Packet4cd(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2, 4, 4, 6, 6)); +} +template <> EIGEN_STRONG_INLINE Packet2cd pdupreal(const Packet2cd& a) { return Packet2cd(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2)); } -#endif // Copy imaginary to real part, i.e. {re(a), im(a)} -> {im(a), im(a)}. -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 - template <> -EIGEN_STRONG_INLINE PacketXcf pdupimag(const PacketXcf& a) { - return PacketXcf(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3)); +EIGEN_STRONG_INLINE Packet8cf pdupimag(const Packet8cf& a) { + return Packet8cf(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15)); } template <> -EIGEN_STRONG_INLINE PacketXcd pdupimag(const PacketXcd& a) { - return PacketXcd(__builtin_shufflevector(a.v, a.v, 1, 1)); -} - -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 - -template <> -EIGEN_STRONG_INLINE PacketXcf pdupimag(const PacketXcf& a) { - return PacketXcf(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3, 5, 5, 7, 7)); -} -template <> -EIGEN_STRONG_INLINE PacketXcd pdupimag(const PacketXcd& a) { - return PacketXcd(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3)); -} - -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 - -template <> -EIGEN_STRONG_INLINE PacketXcf pdupimag(const PacketXcf& a) { - return PacketXcf(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15)); -} -template <> -EIGEN_STRONG_INLINE PacketXcd pdupimag(const PacketXcd& a) { - return PacketXcd(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3, 5, 5, 7, 7)); -} - -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES - -// Sub-packet pdupimag specializations needed for reductions. -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 32 -template <> -EIGEN_STRONG_INLINE Packet2cf pdupimag(const Packet2cf& a) { - return Packet2cf(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3)); -} -#endif -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64 -template <> EIGEN_STRONG_INLINE Packet4cf pdupimag(const Packet4cf& a) { return Packet4cf(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3, 5, 5, 7, 7)); } template <> +EIGEN_STRONG_INLINE Packet2cf pdupimag(const Packet2cf& a) { + return Packet2cf(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3)); +} +template <> +EIGEN_STRONG_INLINE Packet4cd pdupimag(const Packet4cd& a) { + return Packet4cd(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3, 5, 5, 7, 7)); +} +template <> EIGEN_STRONG_INLINE Packet2cd pdupimag(const Packet2cd& a) { return Packet2cd(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3)); } -#endif - -// --- ploaddup --- -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 template <> -EIGEN_STRONG_INLINE PacketXcf ploaddup(const std::complex* from) { - return pset1(*from); +EIGEN_STRONG_INLINE Packet8cf ploaddup(const std::complex* from) { + return Packet8cf(Packet16f{std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]), + std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1]), + std::real(from[2]), std::imag(from[2]), std::real(from[2]), std::imag(from[2]), + std::real(from[3]), std::imag(from[3]), std::real(from[3]), std::imag(from[3])}); } template <> -EIGEN_STRONG_INLINE PacketXcd ploaddup(const std::complex* from) { - return pset1(*from); -} - -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 - -template <> -EIGEN_STRONG_INLINE PacketXcf ploaddup(const std::complex* from) { - return PacketXcf(PacketXf{std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]), - std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1])}); -} -template <> -EIGEN_STRONG_INLINE PacketXcd ploaddup(const std::complex* from) { - return pset1(*from); -} - -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 - -template <> -EIGEN_STRONG_INLINE PacketXcf ploaddup(const std::complex* from) { - return PacketXcf(PacketXf{std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]), - std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1]), - std::real(from[2]), std::imag(from[2]), std::real(from[2]), std::imag(from[2]), - std::real(from[3]), std::imag(from[3]), std::real(from[3]), std::imag(from[3])}); -} -template <> -EIGEN_STRONG_INLINE PacketXcd ploaddup(const std::complex* from) { - return PacketXcd(PacketXd{std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]), +EIGEN_STRONG_INLINE Packet4cd ploaddup(const std::complex* from) { + return Packet4cd(Packet8d{std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]), std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1])}); } -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES - -// --- ploadquad --- -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 - template <> -EIGEN_STRONG_INLINE PacketXcf ploadquad(const std::complex* from) { - return pset1(*from); +EIGEN_STRONG_INLINE Packet8cf ploadquad(const std::complex* from) { + return Packet8cf(Packet16f{std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]), + std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]), + std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1]), + std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1])}); } template <> -EIGEN_STRONG_INLINE PacketXcd ploadquad(const std::complex* from) { - return pset1(*from); +EIGEN_STRONG_INLINE Packet4cd ploadquad(const std::complex* from) { + return pset1(*from); } -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 - template <> -EIGEN_STRONG_INLINE PacketXcf ploadquad(const std::complex* from) { - return pset1(*from); +EIGEN_STRONG_INLINE Packet8cf preverse(const Packet8cf& a) { + return Packet8cf(reinterpret_cast(preverse(reinterpret_cast(a.v)))); } template <> -EIGEN_STRONG_INLINE PacketXcd ploadquad(const std::complex* from) { - return pset1(*from); +EIGEN_STRONG_INLINE Packet4cd preverse(const Packet4cd& a) { + return Packet4cd(__builtin_shufflevector(a.v, a.v, 6, 7, 4, 5, 2, 3, 0, 1)); } -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 - -template <> -EIGEN_STRONG_INLINE PacketXcf ploadquad(const std::complex* from) { - return PacketXcf(PacketXf{std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]), - std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]), - std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1]), - std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1])}); -} -template <> -EIGEN_STRONG_INLINE PacketXcd ploadquad(const std::complex* from) { - return pset1(*from); -} - -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES - -// --- preverse --- -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 - -template <> -EIGEN_STRONG_INLINE PacketXcf preverse(const PacketXcf& a) { - // 2 complex floats: swap pairs (0,1) and (2,3) - return PacketXcf(__builtin_shufflevector(a.v, a.v, 2, 3, 0, 1)); -} -template <> -EIGEN_STRONG_INLINE PacketXcd preverse(const PacketXcd& a) { - // 1 complex double: identity - return a; -} - -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 - -template <> -EIGEN_STRONG_INLINE PacketXcf preverse(const PacketXcf& a) { - // 4 complex floats: reverse pairs - return PacketXcf(reinterpret_cast(preverse(reinterpret_cast(a.v)))); -} -template <> -EIGEN_STRONG_INLINE PacketXcd preverse(const PacketXcd& a) { - // 2 complex doubles: swap pairs - return PacketXcd(__builtin_shufflevector(a.v, a.v, 2, 3, 0, 1)); -} - -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 - -template <> -EIGEN_STRONG_INLINE PacketXcf preverse(const PacketXcf& a) { - return PacketXcf(reinterpret_cast(preverse(reinterpret_cast(a.v)))); -} -template <> -EIGEN_STRONG_INLINE PacketXcd preverse(const PacketXcd& a) { - return PacketXcd(__builtin_shufflevector(a.v, a.v, 6, 7, 4, 5, 2, 3, 0, 1)); -} - -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES - // ----------- Binary ops ------------------ #define DELEGATE_BINARY_TO_REAL_OP(PACKET_TYPE, OP) \ template <> \ @@ -560,8 +300,8 @@ EIGEN_STRONG_INLINE PacketXcd preverse(const PacketXcd& a) { return PACKET_TYPE(pand(pdupreal(t).v, pdupimag(t).v)); \ } -EIGEN_CLANG_COMPLEX_BINARY_CWISE_OPS(PacketXcf); -EIGEN_CLANG_COMPLEX_BINARY_CWISE_OPS(PacketXcd); +EIGEN_CLANG_COMPLEX_BINARY_CWISE_OPS(Packet8cf); +EIGEN_CLANG_COMPLEX_BINARY_CWISE_OPS(Packet4cd); // Binary ops that are needed on sub-packets for predux and predux_mul. #define EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(PACKET_TYPE) \ @@ -571,17 +311,11 @@ EIGEN_CLANG_COMPLEX_BINARY_CWISE_OPS(PacketXcd); return pmul_complex(a, b); \ } -EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(PacketXcf); -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 32 -EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet2cf); -#endif -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64 +EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet8cf); EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet4cf); -#endif -EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(PacketXcd); -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64 +EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet2cf); +EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet4cd); EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet2cd); -#endif #define EIGEN_CLANG_PACKET_SCATTER_GATHER(PACKET_TYPE) \ template <> \ @@ -604,8 +338,8 @@ EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet2cd); return result; \ } -EIGEN_CLANG_PACKET_SCATTER_GATHER(PacketXcf); -EIGEN_CLANG_PACKET_SCATTER_GATHER(PacketXcd); +EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet8cf); +EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet4cd); #undef EIGEN_CLANG_PACKET_SCATTER_GATHER #undef DELEGATE_BINARY_TO_REAL_OP @@ -614,89 +348,46 @@ EIGEN_CLANG_PACKET_SCATTER_GATHER(PacketXcd); // ------------ ternary ops ------------- template <> -EIGEN_STRONG_INLINE PacketXcf pselect(const PacketXcf& mask, const PacketXcf& a, const PacketXcf& b) { - return PacketXcf(reinterpret_cast( - pselect(reinterpret_cast(mask.v), reinterpret_cast(a.v), reinterpret_cast(b.v)))); +EIGEN_STRONG_INLINE Packet8cf pselect(const Packet8cf& mask, const Packet8cf& a, const Packet8cf& b) { + return Packet8cf(reinterpret_cast( + pselect(reinterpret_cast(mask.v), reinterpret_cast(a.v), reinterpret_cast(b.v)))); } -// --- zip_in_place for complex --- namespace detail { - -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 - template <> -EIGEN_ALWAYS_INLINE void zip_in_place(PacketXcf& p1, PacketXcf& p2) { - PacketXf tmp = __builtin_shufflevector(p1.v, p2.v, 0, 1, 4, 5); - p2.v = __builtin_shufflevector(p1.v, p2.v, 2, 3, 6, 7); - p1.v = tmp; -} -// PacketXcd at 16 bytes has 1 element, no zip_in_place needed. - -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 - -template <> -EIGEN_ALWAYS_INLINE void zip_in_place(PacketXcf& p1, PacketXcf& p2) { - PacketXf tmp = __builtin_shufflevector(p1.v, p2.v, 0, 1, 8, 9, 2, 3, 10, 11); - p2.v = __builtin_shufflevector(p1.v, p2.v, 4, 5, 12, 13, 6, 7, 14, 15); - p1.v = tmp; -} -template <> -EIGEN_ALWAYS_INLINE void zip_in_place(PacketXcd& p1, PacketXcd& p2) { - PacketXd tmp = __builtin_shufflevector(p1.v, p2.v, 0, 1, 4, 5); - p2.v = __builtin_shufflevector(p1.v, p2.v, 2, 3, 6, 7); - p1.v = tmp; -} - -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 - -template <> -EIGEN_ALWAYS_INLINE void zip_in_place(PacketXcf& p1, PacketXcf& p2) { - PacketXf tmp = __builtin_shufflevector(p1.v, p2.v, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23); +EIGEN_ALWAYS_INLINE void zip_in_place(Packet8cf& p1, Packet8cf& p2) { + Packet16f tmp = __builtin_shufflevector(p1.v, p2.v, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23); p2.v = __builtin_shufflevector(p1.v, p2.v, 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31); p1.v = tmp; } template <> -EIGEN_ALWAYS_INLINE void zip_in_place(PacketXcd& p1, PacketXcd& p2) { - PacketXd tmp = __builtin_shufflevector(p1.v, p2.v, 0, 1, 8, 9, 2, 3, 10, 11); +EIGEN_ALWAYS_INLINE void zip_in_place(Packet4cd& p1, Packet4cd& p2) { + Packet8d tmp = __builtin_shufflevector(p1.v, p2.v, 0, 1, 8, 9, 2, 3, 10, 11); p2.v = __builtin_shufflevector(p1.v, p2.v, 4, 5, 12, 13, 6, 7, 14, 15); p1.v = tmp; } - -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES - } // namespace detail -// --- ptranspose for complex --- -// PacketXcf: valid block sizes depend on kComplexFloatSize. -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { detail::ptranspose_impl(kernel); } -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 32 -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { detail::ptranspose_impl(kernel); } -#endif -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64 -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { detail::ptranspose_impl(kernel); } -#endif -// PacketXcd: valid block sizes depend on kComplexDoubleSize. -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 32 -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { detail::ptranspose_impl(kernel); } -#endif -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64 -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { detail::ptranspose_impl(kernel); } -#endif -EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PacketXcf, PacketXf) -EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PacketXcd, PacketXd) +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet8cf, Packet16f) +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cd, Packet8d) } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/clang/MathFunctions.h b/Eigen/src/Core/arch/clang/MathFunctions.h index c2afeda8f..706a87051 100644 --- a/Eigen/src/Core/arch/clang/MathFunctions.h +++ b/Eigen/src/Core/arch/clang/MathFunctions.h @@ -18,27 +18,27 @@ namespace Eigen { namespace internal { template <> -EIGEN_STRONG_INLINE PacketXf pfrexp(const PacketXf& a, PacketXf& exponent) { +EIGEN_STRONG_INLINE Packet16f pfrexp(const Packet16f& a, Packet16f& exponent) { return pfrexp_generic(a, exponent); } template <> -EIGEN_STRONG_INLINE PacketXd pfrexp(const PacketXd& a, PacketXd& exponent) { +EIGEN_STRONG_INLINE Packet8d pfrexp(const Packet8d& a, Packet8d& exponent) { return pfrexp_generic(a, exponent); } template <> -EIGEN_STRONG_INLINE PacketXf pldexp(const PacketXf& a, const PacketXf& exponent) { +EIGEN_STRONG_INLINE Packet16f pldexp(const Packet16f& a, const Packet16f& exponent) { return pldexp_generic(a, exponent); } template <> -EIGEN_STRONG_INLINE PacketXd pldexp(const PacketXd& a, const PacketXd& exponent) { +EIGEN_STRONG_INLINE Packet8d pldexp(const Packet8d& a, const Packet8d& exponent) { return pldexp_generic(a, exponent); } -EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(PacketXf) -EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(PacketXd) +EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet16f) +EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet8d) } // end namespace internal diff --git a/Eigen/src/Core/arch/clang/PacketMath.h b/Eigen/src/Core/arch/clang/PacketMath.h index 84126944f..4beadfe64 100644 --- a/Eigen/src/Core/arch/clang/PacketMath.h +++ b/Eigen/src/Core/arch/clang/PacketMath.h @@ -24,32 +24,14 @@ template using VectorType = ScalarT __attribute__((ext_vector_type(n), aligned(n * sizeof(ScalarT)))); } // namespace detail -// --- Naming Convention --- -// This backend uses size-independent type aliases so the same code works -// for EIGEN_GENERIC_VECTOR_SIZE_BYTES in {16, 32, 64}: -// -// PacketXf - float vector (4, 8, or 16 elements) -// PacketXd - double vector (2, 4, or 8 elements) -// PacketXi - int32_t vector (4, 8, or 16 elements) -// PacketXl - int64_t vector (2, 4, or 8 elements) -// PacketXcf - complex vector (2, 4, or 8 elements) [in Complex.h] -// PacketXcd - complex vector (1, 2, or 4 elements) [in Complex.h] -// -// The "X" suffix indicates the element count is determined by the macro -// EIGEN_GENERIC_VECTOR_SIZE_BYTES at compile time. Operations that require -// compile-time constant indices (e.g. __builtin_shufflevector) use -// #if EIGEN_GENERIC_VECTOR_SIZE_BYTES == ... blocks. +// --- Primary packet type definitions (fixed at 64 bytes) --- -static_assert(EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 || EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 || - EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64, - "EIGEN_GENERIC_VECTOR_SIZE_BYTES must be 16, 32, or 64"); - -constexpr int kFloatPacketSize = EIGEN_GENERIC_VECTOR_SIZE_BYTES / sizeof(float); -constexpr int kDoublePacketSize = EIGEN_GENERIC_VECTOR_SIZE_BYTES / sizeof(double); -using PacketXf = detail::VectorType; -using PacketXd = detail::VectorType; -using PacketXi = detail::VectorType; -using PacketXl = detail::VectorType; +// TODO(rmlarsen): Generalize to other vector sizes. +static_assert(EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64, "We currently assume the full vector size is 64 bytes"); +using Packet16f = detail::VectorType; +using Packet8d = detail::VectorType; +using Packet16i = detail::VectorType; +using Packet8l = detail::VectorType; // --- packet_traits specializations --- struct generic_float_packet_traits : default_packet_traits { @@ -100,20 +82,20 @@ struct generic_float_packet_traits : default_packet_traits { template <> struct packet_traits : generic_float_packet_traits { - using type = PacketXf; - using half = PacketXf; + using type = Packet16f; + using half = Packet16f; enum { - size = kFloatPacketSize, + size = 16, }; }; template <> struct packet_traits : generic_float_packet_traits { - using type = PacketXd; - using half = PacketXd; + using type = Packet8d; + using half = Packet8d; // Generic double-precision acos/asin are not yet implemented in // GenericPacketMathFunctions.h (only float versions exist). - enum { size = kDoublePacketSize, HasACos = 0, HasASin = 0 }; + enum { size = 8, HasACos = 0, HasASin = 0 }; }; struct generic_integer_packet_traits : default_packet_traits { @@ -149,19 +131,19 @@ struct generic_integer_packet_traits : default_packet_traits { template <> struct packet_traits : generic_integer_packet_traits { - using type = PacketXi; - using half = PacketXi; + using type = Packet16i; + using half = Packet16i; enum { - size = kFloatPacketSize, + size = 16, }; }; template <> struct packet_traits : generic_integer_packet_traits { - using type = PacketXl; - using half = PacketXl; + using type = Packet8l; + using half = Packet8l; enum { - size = kDoublePacketSize, + size = 8, }; }; @@ -174,37 +156,37 @@ struct generic_unpacket_traits : default_unpacket_traits { }; template <> -struct unpacket_traits : generic_unpacket_traits { +struct unpacket_traits : generic_unpacket_traits { using type = float; - using half = PacketXf; - using integer_packet = PacketXi; + using half = Packet16f; + using integer_packet = Packet16i; enum { - size = kFloatPacketSize, + size = 16, }; }; template <> -struct unpacket_traits : generic_unpacket_traits { +struct unpacket_traits : generic_unpacket_traits { using type = double; - using half = PacketXd; - using integer_packet = PacketXl; + using half = Packet8d; + using integer_packet = Packet8l; enum { - size = kDoublePacketSize, + size = 8, }; }; template <> -struct unpacket_traits : generic_unpacket_traits { +struct unpacket_traits : generic_unpacket_traits { using type = int32_t; - using half = PacketXi; + using half = Packet16i; enum { - size = kFloatPacketSize, + size = 16, }; }; template <> -struct unpacket_traits : generic_unpacket_traits { +struct unpacket_traits : generic_unpacket_traits { using type = int64_t; - using half = PacketXl; + using half = Packet8l; enum { - size = kDoublePacketSize, + size = 8, }; }; @@ -283,21 +265,21 @@ EIGEN_STRONG_INLINE void store_vector_aligned(scalar_type_of_vector_t* detail::store_vector_aligned(to, from); \ } -EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(PacketXf) -EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(PacketXd) -EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(PacketXi) -EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(PacketXl) +EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet16f) +EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet8d) +EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet16i) +EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet8l) #undef EIGEN_CLANG_PACKET_LOAD_STORE_PACKET // --- Broadcast operation --- template <> -EIGEN_STRONG_INLINE PacketXf pset1frombits(uint32_t from) { - return PacketXf(numext::bit_cast(from)); +EIGEN_STRONG_INLINE Packet16f pset1frombits(uint32_t from) { + return Packet16f(numext::bit_cast(from)); } template <> -EIGEN_STRONG_INLINE PacketXd pset1frombits(uint64_t from) { - return PacketXd(numext::bit_cast(from)); +EIGEN_STRONG_INLINE Packet8d pset1frombits(uint64_t from) { + return Packet8d(numext::bit_cast(from)); } #define EIGEN_CLANG_PACKET_SET1(PACKET_TYPE) \ @@ -310,10 +292,10 @@ EIGEN_STRONG_INLINE PacketXd pset1frombits(uint64_t from) { return from[0]; \ } -EIGEN_CLANG_PACKET_SET1(PacketXf) -EIGEN_CLANG_PACKET_SET1(PacketXd) -EIGEN_CLANG_PACKET_SET1(PacketXi) -EIGEN_CLANG_PACKET_SET1(PacketXl) +EIGEN_CLANG_PACKET_SET1(Packet16f) +EIGEN_CLANG_PACKET_SET1(Packet8d) +EIGEN_CLANG_PACKET_SET1(Packet16i) +EIGEN_CLANG_PACKET_SET1(Packet8l) #undef EIGEN_CLANG_PACKET_SET1 // --- Arithmetic operations --- @@ -327,10 +309,10 @@ EIGEN_CLANG_PACKET_SET1(PacketXl) return -a; \ } -EIGEN_CLANG_PACKET_ARITHMETIC(PacketXf) -EIGEN_CLANG_PACKET_ARITHMETIC(PacketXd) -EIGEN_CLANG_PACKET_ARITHMETIC(PacketXi) -EIGEN_CLANG_PACKET_ARITHMETIC(PacketXl) +EIGEN_CLANG_PACKET_ARITHMETIC(Packet16f) +EIGEN_CLANG_PACKET_ARITHMETIC(Packet8d) +EIGEN_CLANG_PACKET_ARITHMETIC(Packet16i) +EIGEN_CLANG_PACKET_ARITHMETIC(Packet8l) #undef EIGEN_CLANG_PACKET_ARITHMETIC // --- Bitwise operations (via casting) --- @@ -339,10 +321,10 @@ namespace detail { // Reinterpret-cast helpers, equivalent to preinterpret<> but defined here // because PacketMath.h is included before TypeCasting.h. -EIGEN_STRONG_INLINE PacketXi preinterpret_float_to_int(const PacketXf& a) { return reinterpret_cast(a); } -EIGEN_STRONG_INLINE PacketXf preinterpret_int_to_float(const PacketXi& a) { return reinterpret_cast(a); } -EIGEN_STRONG_INLINE PacketXl preinterpret_double_to_long(const PacketXd& a) { return reinterpret_cast(a); } -EIGEN_STRONG_INLINE PacketXd preinterpret_long_to_double(const PacketXl& a) { return reinterpret_cast(a); } +EIGEN_STRONG_INLINE Packet16i preinterpret_float_to_int(const Packet16f& a) { return reinterpret_cast(a); } +EIGEN_STRONG_INLINE Packet16f preinterpret_int_to_float(const Packet16i& a) { return reinterpret_cast(a); } +EIGEN_STRONG_INLINE Packet8l preinterpret_double_to_long(const Packet8d& a) { return reinterpret_cast(a); } +EIGEN_STRONG_INLINE Packet8d preinterpret_long_to_double(const Packet8l& a) { return reinterpret_cast(a); } } // namespace detail @@ -386,8 +368,8 @@ EIGEN_STRONG_INLINE PacketXd preinterpret_long_to_double(const PacketXl& a) { re return a << N; \ } -EIGEN_CLANG_PACKET_BITWISE_INT(PacketXi) -EIGEN_CLANG_PACKET_BITWISE_INT(PacketXl) +EIGEN_CLANG_PACKET_BITWISE_INT(Packet16i) +EIGEN_CLANG_PACKET_BITWISE_INT(Packet8l) #undef EIGEN_CLANG_PACKET_BITWISE_INT // Bitwise ops for floating point packets @@ -419,8 +401,8 @@ EIGEN_CLANG_PACKET_BITWISE_INT(PacketXl) return CAST_FROM_INT(CAST_TO_INT(a) & ~CAST_TO_INT(b)); \ } -EIGEN_CLANG_PACKET_BITWISE_FLOAT(PacketXf, detail::preinterpret_float_to_int, detail::preinterpret_int_to_float) -EIGEN_CLANG_PACKET_BITWISE_FLOAT(PacketXd, detail::preinterpret_double_to_long, detail::preinterpret_long_to_double) +EIGEN_CLANG_PACKET_BITWISE_FLOAT(Packet16f, detail::preinterpret_float_to_int, detail::preinterpret_int_to_float) +EIGEN_CLANG_PACKET_BITWISE_FLOAT(Packet8d, detail::preinterpret_double_to_long, detail::preinterpret_long_to_double) #undef EIGEN_CLANG_PACKET_BITWISE_FLOAT // --- Comparison operations --- @@ -446,8 +428,8 @@ EIGEN_CLANG_PACKET_BITWISE_FLOAT(PacketXd, detail::preinterpret_double_to_long, return numext::bit_cast(INT_PACKET_TYPE(!(a >= b))); \ } -EIGEN_CLANG_PACKET_CMP(PacketXf, PacketXi) -EIGEN_CLANG_PACKET_CMP(PacketXd, PacketXl) +EIGEN_CLANG_PACKET_CMP(Packet16f, Packet16i) +EIGEN_CLANG_PACKET_CMP(Packet8d, Packet8l) #undef EIGEN_CLANG_PACKET_CMP // --- Min/Max operations --- @@ -490,10 +472,10 @@ EIGEN_CLANG_PACKET_CMP(PacketXd, PacketXl) return mask != 0 ? a : b; \ } -EIGEN_CLANG_PACKET_ELEMENTWISE(PacketXf) -EIGEN_CLANG_PACKET_ELEMENTWISE(PacketXd) -EIGEN_CLANG_PACKET_ELEMENTWISE(PacketXi) -EIGEN_CLANG_PACKET_ELEMENTWISE(PacketXl) +EIGEN_CLANG_PACKET_ELEMENTWISE(Packet16f) +EIGEN_CLANG_PACKET_ELEMENTWISE(Packet8d) +EIGEN_CLANG_PACKET_ELEMENTWISE(Packet16i) +EIGEN_CLANG_PACKET_ELEMENTWISE(Packet8l) #undef EIGEN_CLANG_PACKET_ELEMENTWISE #endif @@ -528,8 +510,8 @@ EIGEN_CLANG_PACKET_ELEMENTWISE(PacketXl) return __builtin_elementwise_sqrt(a); \ } -EIGEN_CLANG_PACKET_MATH_FLOAT(PacketXf) -EIGEN_CLANG_PACKET_MATH_FLOAT(PacketXd) +EIGEN_CLANG_PACKET_MATH_FLOAT(Packet16f) +EIGEN_CLANG_PACKET_MATH_FLOAT(Packet8d) #undef EIGEN_CLANG_PACKET_MATH_FLOAT #endif @@ -581,8 +563,8 @@ EIGEN_CLANG_PACKET_MATH_FLOAT(PacketXd) } #endif -EIGEN_CLANG_PACKET_MADD(PacketXf) -EIGEN_CLANG_PACKET_MADD(PacketXd) +EIGEN_CLANG_PACKET_MADD(Packet16f) +EIGEN_CLANG_PACKET_MADD(Packet8d) #undef EIGEN_CLANG_PACKET_MADD #define EIGEN_CLANG_PACKET_SCATTER_GATHER(PACKET_TYPE) \ @@ -604,10 +586,10 @@ EIGEN_CLANG_PACKET_MADD(PacketXd) return result; \ } -EIGEN_CLANG_PACKET_SCATTER_GATHER(PacketXf) -EIGEN_CLANG_PACKET_SCATTER_GATHER(PacketXd) -EIGEN_CLANG_PACKET_SCATTER_GATHER(PacketXi) -EIGEN_CLANG_PACKET_SCATTER_GATHER(PacketXl) +EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet16f) +EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet8d) +EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet16i) +EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet8l) #undef EIGEN_CLANG_PACKET_SCATTER_GATHER @@ -615,14 +597,6 @@ EIGEN_CLANG_PACKET_SCATTER_GATHER(PacketXl) #if EIGEN_HAS_BUILTIN(__builtin_shufflevector) namespace detail { template -EIGEN_STRONG_INLINE Packet preverse_impl_2(const Packet& a) { - return __builtin_shufflevector(a, a, 1, 0); -} -template -EIGEN_STRONG_INLINE Packet preverse_impl_4(const Packet& a) { - return __builtin_shufflevector(a, a, 3, 2, 1, 0); -} -template EIGEN_STRONG_INLINE Packet preverse_impl_8(const Packet& a) { return __builtin_shufflevector(a, a, 7, 6, 5, 4, 3, 2, 1, 0); } @@ -632,81 +606,33 @@ EIGEN_STRONG_INLINE Packet preverse_impl_16(const Packet& a) { } } // namespace detail -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 +#define EIGEN_CLANG_PACKET_REVERSE(PACKET_TYPE, SIZE) \ + template <> \ + EIGEN_STRONG_INLINE PACKET_TYPE preverse(const PACKET_TYPE& a) { \ + return detail::preverse_impl_##SIZE(a); \ + } -template <> -EIGEN_STRONG_INLINE PacketXf preverse(const PacketXf& a) { - return detail::preverse_impl_4(a); -} -template <> -EIGEN_STRONG_INLINE PacketXd preverse(const PacketXd& a) { - return detail::preverse_impl_2(a); -} -template <> -EIGEN_STRONG_INLINE PacketXi preverse(const PacketXi& a) { - return detail::preverse_impl_4(a); -} -template <> -EIGEN_STRONG_INLINE PacketXl preverse(const PacketXl& a) { - return detail::preverse_impl_2(a); -} - -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 - -template <> -EIGEN_STRONG_INLINE PacketXf preverse(const PacketXf& a) { - return detail::preverse_impl_8(a); -} -template <> -EIGEN_STRONG_INLINE PacketXd preverse(const PacketXd& a) { - return detail::preverse_impl_4(a); -} -template <> -EIGEN_STRONG_INLINE PacketXi preverse(const PacketXi& a) { - return detail::preverse_impl_8(a); -} -template <> -EIGEN_STRONG_INLINE PacketXl preverse(const PacketXl& a) { - return detail::preverse_impl_4(a); -} - -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 - -template <> -EIGEN_STRONG_INLINE PacketXf preverse(const PacketXf& a) { - return detail::preverse_impl_16(a); -} -template <> -EIGEN_STRONG_INLINE PacketXd preverse(const PacketXd& a) { - return detail::preverse_impl_8(a); -} -template <> -EIGEN_STRONG_INLINE PacketXi preverse(const PacketXi& a) { - return detail::preverse_impl_16(a); -} -template <> -EIGEN_STRONG_INLINE PacketXl preverse(const PacketXl& a) { - return detail::preverse_impl_8(a); -} - -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES +EIGEN_CLANG_PACKET_REVERSE(Packet16f, 16) +EIGEN_CLANG_PACKET_REVERSE(Packet8d, 8) +EIGEN_CLANG_PACKET_REVERSE(Packet16i, 16) +EIGEN_CLANG_PACKET_REVERSE(Packet8l, 8) +#undef EIGEN_CLANG_PACKET_REVERSE namespace detail { - template -EIGEN_STRONG_INLINE Packet ploaddup2(const typename unpacket_traits::type* from) { +EIGEN_STRONG_INLINE Packet ploaddup16(const typename unpacket_traits::type* from) { static_assert((unpacket_traits::size) % 2 == 0, "Packet size must be a multiple of 2"); using HalfPacket = HalfPacket; HalfPacket a = load_vector_unaligned(from); - return __builtin_shufflevector(a, a, 0, 0); + return __builtin_shufflevector(a, a, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7); } template -EIGEN_STRONG_INLINE Packet ploaddup4(const typename unpacket_traits::type* from) { - static_assert((unpacket_traits::size) % 2 == 0, "Packet size must be a multiple of 2"); - using HalfPacket = HalfPacket; - HalfPacket a = load_vector_unaligned(from); - return __builtin_shufflevector(a, a, 0, 0, 1, 1); +EIGEN_STRONG_INLINE Packet ploadquad16(const typename unpacket_traits::type* from) { + static_assert((unpacket_traits::size) % 4 == 0, "Packet size must be a multiple of 4"); + using QuarterPacket = QuarterPacket; + QuarterPacket a = load_vector_unaligned(from); + return __builtin_shufflevector(a, a, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3); } template @@ -717,22 +643,6 @@ EIGEN_STRONG_INLINE Packet ploaddup8(const typename unpacket_traits::typ return __builtin_shufflevector(a, a, 0, 0, 1, 1, 2, 2, 3, 3); } -template -EIGEN_STRONG_INLINE Packet ploaddup16(const typename unpacket_traits::type* from) { - static_assert((unpacket_traits::size) % 2 == 0, "Packet size must be a multiple of 2"); - using HalfPacket = HalfPacket; - HalfPacket a = load_vector_unaligned(from); - return __builtin_shufflevector(a, a, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7); -} - -template -EIGEN_STRONG_INLINE Packet ploadquad4(const typename unpacket_traits::type* from) { - static_assert((unpacket_traits::size) % 4 == 0, "Packet size must be a multiple of 4"); - using QuarterPacket = QuarterPacket; - QuarterPacket a = load_vector_unaligned(from); - return __builtin_shufflevector(a, a, 0, 0, 0, 0); -} - template EIGEN_STRONG_INLINE Packet ploadquad8(const typename unpacket_traits::type* from) { static_assert((unpacket_traits::size) % 4 == 0, "Packet size must be a multiple of 4"); @@ -741,241 +651,84 @@ EIGEN_STRONG_INLINE Packet ploadquad8(const typename unpacket_traits::ty return __builtin_shufflevector(a, a, 0, 0, 0, 0, 1, 1, 1, 1); } -template -EIGEN_STRONG_INLINE Packet ploadquad16(const typename unpacket_traits::type* from) { - static_assert((unpacket_traits::size) % 4 == 0, "Packet size must be a multiple of 4"); - using QuarterPacket = QuarterPacket; - QuarterPacket a = load_vector_unaligned(from); - return __builtin_shufflevector(a, a, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3); -} - } // namespace detail -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 - template <> -EIGEN_STRONG_INLINE PacketXf ploaddup(const float* from) { - return detail::ploaddup4(from); +EIGEN_STRONG_INLINE Packet16f ploaddup(const float* from) { + return detail::ploaddup16(from); } template <> -EIGEN_STRONG_INLINE PacketXd ploaddup(const double* from) { - return detail::ploaddup2(from); +EIGEN_STRONG_INLINE Packet8d ploaddup(const double* from) { + return detail::ploaddup8(from); } template <> -EIGEN_STRONG_INLINE PacketXi ploaddup(const int32_t* from) { - return detail::ploaddup4(from); +EIGEN_STRONG_INLINE Packet16i ploaddup(const int32_t* from) { + return detail::ploaddup16(from); } template <> -EIGEN_STRONG_INLINE PacketXl ploaddup(const int64_t* from) { - return detail::ploaddup2(from); -} -template <> -EIGEN_STRONG_INLINE PacketXf ploadquad(const float* from) { - return detail::ploadquad4(from); -} -template <> -EIGEN_STRONG_INLINE PacketXi ploadquad(const int32_t* from) { - return detail::ploadquad4(from); -} -// No ploadquad for 2-element packets (PacketXd, PacketXl) at 16 bytes. - -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 - -template <> -EIGEN_STRONG_INLINE PacketXf ploaddup(const float* from) { - return detail::ploaddup8(from); -} -template <> -EIGEN_STRONG_INLINE PacketXd ploaddup(const double* from) { - return detail::ploaddup4(from); -} -template <> -EIGEN_STRONG_INLINE PacketXi ploaddup(const int32_t* from) { - return detail::ploaddup8(from); -} -template <> -EIGEN_STRONG_INLINE PacketXl ploaddup(const int64_t* from) { - return detail::ploaddup4(from); -} -template <> -EIGEN_STRONG_INLINE PacketXf ploadquad(const float* from) { - return detail::ploadquad8(from); -} -template <> -EIGEN_STRONG_INLINE PacketXd ploadquad(const double* from) { - return detail::ploadquad4(from); -} -template <> -EIGEN_STRONG_INLINE PacketXi ploadquad(const int32_t* from) { - return detail::ploadquad8(from); -} -template <> -EIGEN_STRONG_INLINE PacketXl ploadquad(const int64_t* from) { - return detail::ploadquad4(from); +EIGEN_STRONG_INLINE Packet8l ploaddup(const int64_t* from) { + return detail::ploaddup8(from); } -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 - template <> -EIGEN_STRONG_INLINE PacketXf ploaddup(const float* from) { - return detail::ploaddup16(from); +EIGEN_STRONG_INLINE Packet16f ploadquad(const float* from) { + return detail::ploadquad16(from); } template <> -EIGEN_STRONG_INLINE PacketXd ploaddup(const double* from) { - return detail::ploaddup8(from); +EIGEN_STRONG_INLINE Packet8d ploadquad(const double* from) { + return detail::ploadquad8(from); } template <> -EIGEN_STRONG_INLINE PacketXi ploaddup(const int32_t* from) { - return detail::ploaddup16(from); +EIGEN_STRONG_INLINE Packet16i ploadquad(const int32_t* from) { + return detail::ploadquad16(from); } template <> -EIGEN_STRONG_INLINE PacketXl ploaddup(const int64_t* from) { - return detail::ploaddup8(from); -} -template <> -EIGEN_STRONG_INLINE PacketXf ploadquad(const float* from) { - return detail::ploadquad16(from); -} -template <> -EIGEN_STRONG_INLINE PacketXd ploadquad(const double* from) { - return detail::ploadquad8(from); -} -template <> -EIGEN_STRONG_INLINE PacketXi ploadquad(const int32_t* from) { - return detail::ploadquad16(from); -} -template <> -EIGEN_STRONG_INLINE PacketXl ploadquad(const int64_t* from) { - return detail::ploadquad8(from); +EIGEN_STRONG_INLINE Packet8l ploadquad(const int64_t* from) { + return detail::ploadquad8(from); } -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES - -// --- plset --- -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 - template <> -EIGEN_STRONG_INLINE PacketXf plset(const float& a) { - return PacketXf{a + 0.0f, a + 1.0f, a + 2.0f, a + 3.0f}; +EIGEN_STRONG_INLINE Packet16f plset(const float& a) { + Packet16f x{a + 0.0f, a + 1.0f, a + 2.0f, a + 3.0f, a + 4.0f, a + 5.0f, a + 6.0f, a + 7.0f, + a + 8.0f, a + 9.0f, a + 10.0f, a + 11.0f, a + 12.0f, a + 13.0f, a + 14.0f, a + 15.0f}; + return x; } template <> -EIGEN_STRONG_INLINE PacketXd plset(const double& a) { - return PacketXd{a + 0.0, a + 1.0}; +EIGEN_STRONG_INLINE Packet8d plset(const double& a) { + return Packet8d{a + 0.0, a + 1.0, a + 2.0, a + 3.0, a + 4.0, a + 5.0, a + 6.0, a + 7.0}; } template <> -EIGEN_STRONG_INLINE PacketXi plset(const int32_t& a) { - return PacketXi{a + 0, a + 1, a + 2, a + 3}; +EIGEN_STRONG_INLINE Packet16i plset(const int32_t& a) { + return Packet16i{a + 0, a + 1, a + 2, a + 3, a + 4, a + 5, a + 6, a + 7, + a + 8, a + 9, a + 10, a + 11, a + 12, a + 13, a + 14, a + 15}; } template <> -EIGEN_STRONG_INLINE PacketXl plset(const int64_t& a) { - return PacketXl{a + 0, a + 1}; +EIGEN_STRONG_INLINE Packet8l plset(const int64_t& a) { + return Packet8l{a + 0, a + 1, a + 2, a + 3, a + 4, a + 5, a + 6, a + 7}; } -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 - template <> -EIGEN_STRONG_INLINE PacketXf plset(const float& a) { - return PacketXf{a + 0.0f, a + 1.0f, a + 2.0f, a + 3.0f, a + 4.0f, a + 5.0f, a + 6.0f, a + 7.0f}; -} -template <> -EIGEN_STRONG_INLINE PacketXd plset(const double& a) { - return PacketXd{a + 0.0, a + 1.0, a + 2.0, a + 3.0}; -} -template <> -EIGEN_STRONG_INLINE PacketXi plset(const int32_t& a) { - return PacketXi{a + 0, a + 1, a + 2, a + 3, a + 4, a + 5, a + 6, a + 7}; -} -template <> -EIGEN_STRONG_INLINE PacketXl plset(const int64_t& a) { - return PacketXl{a + 0, a + 1, a + 2, a + 3}; -} - -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 - -template <> -EIGEN_STRONG_INLINE PacketXf plset(const float& a) { - return PacketXf{a + 0.0f, a + 1.0f, a + 2.0f, a + 3.0f, a + 4.0f, a + 5.0f, a + 6.0f, a + 7.0f, - a + 8.0f, a + 9.0f, a + 10.0f, a + 11.0f, a + 12.0f, a + 13.0f, a + 14.0f, a + 15.0f}; -} -template <> -EIGEN_STRONG_INLINE PacketXd plset(const double& a) { - return PacketXd{a + 0.0, a + 1.0, a + 2.0, a + 3.0, a + 4.0, a + 5.0, a + 6.0, a + 7.0}; -} -template <> -EIGEN_STRONG_INLINE PacketXi plset(const int32_t& a) { - return PacketXi{a + 0, a + 1, a + 2, a + 3, a + 4, a + 5, a + 6, a + 7, - a + 8, a + 9, a + 10, a + 11, a + 12, a + 13, a + 14, a + 15}; -} -template <> -EIGEN_STRONG_INLINE PacketXl plset(const int64_t& a) { - return PacketXl{a + 0, a + 1, a + 2, a + 3, a + 4, a + 5, a + 6, a + 7}; -} - -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES - -// --- peven_mask --- -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 - -template <> -EIGEN_STRONG_INLINE PacketXf peven_mask(const PacketXf& /* unused */) { +EIGEN_STRONG_INLINE Packet16f peven_mask(const Packet16f& /* unused */) { float kTrue = numext::bit_cast(int32_t(-1)); float kFalse = 0.0f; - return PacketXf{kTrue, kFalse, kTrue, kFalse}; + return Packet16f{kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, + kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, kTrue, kFalse}; } + template <> -EIGEN_STRONG_INLINE PacketXd peven_mask(const PacketXd& /* unused */) { +EIGEN_STRONG_INLINE Packet8d peven_mask(const Packet8d& /* unused */) { double kTrue = numext::bit_cast(int64_t(-1l)); double kFalse = 0.0; - return PacketXd{kTrue, kFalse}; + return Packet8d{kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, kTrue, kFalse}; } -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 - -template <> -EIGEN_STRONG_INLINE PacketXf peven_mask(const PacketXf& /* unused */) { - float kTrue = numext::bit_cast(int32_t(-1)); - float kFalse = 0.0f; - return PacketXf{kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, kTrue, kFalse}; -} -template <> -EIGEN_STRONG_INLINE PacketXd peven_mask(const PacketXd& /* unused */) { - double kTrue = numext::bit_cast(int64_t(-1l)); - double kFalse = 0.0; - return PacketXd{kTrue, kFalse, kTrue, kFalse}; -} - -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 - -template <> -EIGEN_STRONG_INLINE PacketXf peven_mask(const PacketXf& /* unused */) { - float kTrue = numext::bit_cast(int32_t(-1)); - float kFalse = 0.0f; - return PacketXf{kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, - kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, kTrue, kFalse}; -} -template <> -EIGEN_STRONG_INLINE PacketXd peven_mask(const PacketXd& /* unused */) { - double kTrue = numext::bit_cast(int64_t(-1l)); - double kFalse = 0.0; - return PacketXd{kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, kTrue, kFalse}; -} - -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES - // Helpers for ptranspose. namespace detail { template -EIGEN_ALWAYS_INLINE void zip_in_place2(Packet& p1, Packet& p2) { - Packet tmp = __builtin_shufflevector(p1, p2, 0, 2); - p2 = __builtin_shufflevector(p1, p2, 1, 3); - p1 = tmp; -} - -template -EIGEN_ALWAYS_INLINE void zip_in_place4(Packet& p1, Packet& p2) { - Packet tmp = __builtin_shufflevector(p1, p2, 0, 4, 1, 5); - p2 = __builtin_shufflevector(p1, p2, 2, 6, 3, 7); +EIGEN_ALWAYS_INLINE void zip_in_place16(Packet& p1, Packet& p2) { + Packet tmp = __builtin_shufflevector(p1, p2, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + p2 = __builtin_shufflevector(p1, p2, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); p1 = tmp; } @@ -986,68 +739,28 @@ EIGEN_ALWAYS_INLINE void zip_in_place8(Packet& p1, Packet& p2) { p1 = tmp; } -template -EIGEN_ALWAYS_INLINE void zip_in_place16(Packet& p1, Packet& p2) { - Packet tmp = __builtin_shufflevector(p1, p2, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - p2 = __builtin_shufflevector(p1, p2, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - p1 = tmp; -} - template void zip_in_place(Packet& p1, Packet& p2); -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 template <> -EIGEN_ALWAYS_INLINE void zip_in_place(PacketXf& p1, PacketXf& p2) { - zip_in_place4(p1, p2); -} -template <> -EIGEN_ALWAYS_INLINE void zip_in_place(PacketXd& p1, PacketXd& p2) { - zip_in_place2(p1, p2); -} -template <> -EIGEN_ALWAYS_INLINE void zip_in_place(PacketXi& p1, PacketXi& p2) { - zip_in_place4(p1, p2); -} -template <> -EIGEN_ALWAYS_INLINE void zip_in_place(PacketXl& p1, PacketXl& p2) { - zip_in_place2(p1, p2); -} -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 -template <> -EIGEN_ALWAYS_INLINE void zip_in_place(PacketXf& p1, PacketXf& p2) { - zip_in_place8(p1, p2); -} -template <> -EIGEN_ALWAYS_INLINE void zip_in_place(PacketXd& p1, PacketXd& p2) { - zip_in_place4(p1, p2); -} -template <> -EIGEN_ALWAYS_INLINE void zip_in_place(PacketXi& p1, PacketXi& p2) { - zip_in_place8(p1, p2); -} -template <> -EIGEN_ALWAYS_INLINE void zip_in_place(PacketXl& p1, PacketXl& p2) { - zip_in_place4(p1, p2); -} -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 -template <> -EIGEN_ALWAYS_INLINE void zip_in_place(PacketXf& p1, PacketXf& p2) { +EIGEN_ALWAYS_INLINE void zip_in_place(Packet16f& p1, Packet16f& p2) { zip_in_place16(p1, p2); } + template <> -EIGEN_ALWAYS_INLINE void zip_in_place(PacketXd& p1, PacketXd& p2) { +EIGEN_ALWAYS_INLINE void zip_in_place(Packet8d& p1, Packet8d& p2) { zip_in_place8(p1, p2); } + template <> -EIGEN_ALWAYS_INLINE void zip_in_place(PacketXi& p1, PacketXi& p2) { +EIGEN_ALWAYS_INLINE void zip_in_place(Packet16i& p1, Packet16i& p2) { zip_in_place16(p1, p2); } + template <> -EIGEN_ALWAYS_INLINE void zip_in_place(PacketXl& p1, PacketXl& p2) { +EIGEN_ALWAYS_INLINE void zip_in_place(Packet8l& p1, Packet8l& p2) { zip_in_place8(p1, p2); } -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES template EIGEN_ALWAYS_INLINE void ptranspose_impl(PacketBlock& kernel) { @@ -1099,68 +812,61 @@ EIGEN_ALWAYS_INLINE void ptranspose_impl(PacketBlock& kernel) { } // namespace detail -// ptranspose overloads: only emit valid block sizes per vector size. -// At 16 bytes: float has 4 elems, double has 2 elems. -// At 32 bytes: float has 8 elems, double has 4 elems. -// At 64 bytes: float has 16 elems, double has 8 elems. - -// All sizes support PacketBlock and PacketBlock. -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { - detail::ptranspose_impl(kernel); -} -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { detail::ptranspose_impl(kernel); } -// All sizes support PacketBlock. -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { detail::ptranspose_impl(kernel); } -// All sizes support PacketBlock and PacketBlock. -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { - detail::ptranspose_impl(kernel); -} -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { detail::ptranspose_impl(kernel); } -// All sizes support PacketBlock. -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { detail::ptranspose_impl(kernel); } -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 32 -// 32+ bytes: float has 8+ elems, double has 4+ elems. -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { detail::ptranspose_impl(kernel); } -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { - detail::ptranspose_impl(kernel); -} -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { - detail::ptranspose_impl(kernel); -} -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { - detail::ptranspose_impl(kernel); -} -#endif -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES >= 64 -// 64 bytes: float has 16 elems, double has 8 elems. -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { detail::ptranspose_impl(kernel); } -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { + +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { detail::ptranspose_impl(kernel); } -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { + +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { detail::ptranspose_impl(kernel); } -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { + +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { + detail::ptranspose_impl(kernel); +} + +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { + detail::ptranspose_impl(kernel); +} + +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { + detail::ptranspose_impl(kernel); +} + +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { + detail::ptranspose_impl(kernel); +} + +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { + detail::ptranspose_impl(kernel); +} + +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { detail::ptranspose_impl(kernel); } -#endif #endif } // end namespace internal diff --git a/Eigen/src/Core/arch/clang/Reductions.h b/Eigen/src/Core/arch/clang/Reductions.h index 37fc1617f..defedf98d 100644 --- a/Eigen/src/Core/arch/clang/Reductions.h +++ b/Eigen/src/Core/arch/clang/Reductions.h @@ -33,10 +33,10 @@ namespace internal { return __builtin_reduce_or(a != 0) != 0; \ } -EIGEN_CLANG_PACKET_REDUX_MINMAX(PacketXf) -EIGEN_CLANG_PACKET_REDUX_MINMAX(PacketXd) -EIGEN_CLANG_PACKET_REDUX_MINMAX(PacketXi) -EIGEN_CLANG_PACKET_REDUX_MINMAX(PacketXl) +EIGEN_CLANG_PACKET_REDUX_MINMAX(Packet16f) +EIGEN_CLANG_PACKET_REDUX_MINMAX(Packet8d) +EIGEN_CLANG_PACKET_REDUX_MINMAX(Packet16i) +EIGEN_CLANG_PACKET_REDUX_MINMAX(Packet8l) #undef EIGEN_CLANG_PACKET_REDUX_MINMAX #endif @@ -52,38 +52,13 @@ EIGEN_CLANG_PACKET_REDUX_MINMAX(PacketXl) } // __builtin_reduce_{mul,add} are only defined for integer types. -EIGEN_CLANG_PACKET_REDUX_INT(PacketXi) -EIGEN_CLANG_PACKET_REDUX_INT(PacketXl) +EIGEN_CLANG_PACKET_REDUX_INT(Packet16i) +EIGEN_CLANG_PACKET_REDUX_INT(Packet8l) #undef EIGEN_CLANG_PACKET_REDUX_INT #endif #if EIGEN_HAS_BUILTIN(__builtin_shufflevector) namespace detail { - -// Reduction helpers for different vector sizes. -// Each returns a pair of (even-sum, odd-sum) or (even-product, odd-product). - -template -EIGEN_STRONG_INLINE std::pair, scalar_type_of_vector_t> ReduceAdd2( - const VectorT& a) { - return {a[0], a[1]}; -} - -template -EIGEN_STRONG_INLINE std::pair, scalar_type_of_vector_t> ReduceAdd4( - const VectorT& a) { - const auto t1 = __builtin_shufflevector(a, a, 0, 1) + __builtin_shufflevector(a, a, 2, 3); - return {t1[0], t1[1]}; -} - -template -EIGEN_STRONG_INLINE std::pair, scalar_type_of_vector_t> ReduceAdd8( - const VectorT& a) { - const auto t1 = __builtin_shufflevector(a, a, 0, 1, 2, 3) + __builtin_shufflevector(a, a, 4, 5, 6, 7); - const auto t2 = __builtin_shufflevector(t1, t1, 0, 1) + __builtin_shufflevector(t1, t1, 2, 3); - return {t2[0], t2[1]}; -} - template EIGEN_STRONG_INLINE std::pair, scalar_type_of_vector_t> ReduceAdd16( const VectorT& a) { @@ -95,23 +70,10 @@ EIGEN_STRONG_INLINE std::pair, scalar_type_of_v } template -EIGEN_STRONG_INLINE std::pair, scalar_type_of_vector_t> ReduceMul2( +EIGEN_STRONG_INLINE std::pair, scalar_type_of_vector_t> ReduceAdd8( const VectorT& a) { - return {a[0], a[1]}; -} - -template -EIGEN_STRONG_INLINE std::pair, scalar_type_of_vector_t> ReduceMul4( - const VectorT& a) { - const auto t1 = __builtin_shufflevector(a, a, 0, 1) * __builtin_shufflevector(a, a, 2, 3); - return {t1[0], t1[1]}; -} - -template -EIGEN_STRONG_INLINE std::pair, scalar_type_of_vector_t> ReduceMul8( - const VectorT& a) { - const auto t1 = __builtin_shufflevector(a, a, 0, 1, 2, 3) * __builtin_shufflevector(a, a, 4, 5, 6, 7); - const auto t2 = __builtin_shufflevector(t1, t1, 0, 1) * __builtin_shufflevector(t1, t1, 2, 3); + const auto t1 = __builtin_shufflevector(a, a, 0, 1, 2, 3) + __builtin_shufflevector(a, a, 4, 5, 6, 7); + const auto t2 = __builtin_shufflevector(t1, t1, 0, 1) + __builtin_shufflevector(t1, t1, 2, 3); return {t2[0], t2[1]}; } @@ -124,188 +86,57 @@ EIGEN_STRONG_INLINE std::pair, scalar_type_of_v const auto t3 = __builtin_shufflevector(t2, t2, 0, 1) * __builtin_shufflevector(t2, t2, 2, 3); return {t3[0], t3[1]}; } + +template +EIGEN_STRONG_INLINE std::pair, scalar_type_of_vector_t> ReduceMul8( + const VectorT& a) { + const auto t1 = __builtin_shufflevector(a, a, 0, 1, 2, 3) * __builtin_shufflevector(a, a, 4, 5, 6, 7); + const auto t2 = __builtin_shufflevector(t1, t1, 0, 1) * __builtin_shufflevector(t1, t1, 2, 3); + return {t2[0], t2[1]}; +} } // namespace detail -// --- predux and predux_mul for float --- -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 - template <> -EIGEN_STRONG_INLINE float predux(const PacketXf& a) { - float even, odd; - std::tie(even, odd) = detail::ReduceAdd4(a); - return even + odd; -} -template <> -EIGEN_STRONG_INLINE float predux_mul(const PacketXf& a) { - float even, odd; - std::tie(even, odd) = detail::ReduceMul4(a); - return even * odd; -} - -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 - -template <> -EIGEN_STRONG_INLINE float predux(const PacketXf& a) { - float even, odd; - std::tie(even, odd) = detail::ReduceAdd8(a); - return even + odd; -} -template <> -EIGEN_STRONG_INLINE float predux_mul(const PacketXf& a) { - float even, odd; - std::tie(even, odd) = detail::ReduceMul8(a); - return even * odd; -} - -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 - -template <> -EIGEN_STRONG_INLINE float predux(const PacketXf& a) { +EIGEN_STRONG_INLINE float predux(const Packet16f& a) { float even, odd; std::tie(even, odd) = detail::ReduceAdd16(a); return even + odd; } template <> -EIGEN_STRONG_INLINE float predux_mul(const PacketXf& a) { - float even, odd; - std::tie(even, odd) = detail::ReduceMul16(a); - return even * odd; -} - -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES - -// --- predux and predux_mul for double --- -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 - -template <> -EIGEN_STRONG_INLINE double predux(const PacketXd& a) { - double even, odd; - std::tie(even, odd) = detail::ReduceAdd2(a); - return even + odd; -} -template <> -EIGEN_STRONG_INLINE double predux_mul(const PacketXd& a) { - double even, odd; - std::tie(even, odd) = detail::ReduceMul2(a); - return even * odd; -} - -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 - -template <> -EIGEN_STRONG_INLINE double predux(const PacketXd& a) { - double even, odd; - std::tie(even, odd) = detail::ReduceAdd4(a); - return even + odd; -} -template <> -EIGEN_STRONG_INLINE double predux_mul(const PacketXd& a) { - double even, odd; - std::tie(even, odd) = detail::ReduceMul4(a); - return even * odd; -} - -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 - -template <> -EIGEN_STRONG_INLINE double predux(const PacketXd& a) { +EIGEN_STRONG_INLINE double predux(const Packet8d& a) { double even, odd; std::tie(even, odd) = detail::ReduceAdd8(a); return even + odd; } template <> -EIGEN_STRONG_INLINE double predux_mul(const PacketXd& a) { +EIGEN_STRONG_INLINE float predux_mul(const Packet16f& a) { + float even, odd; + std::tie(even, odd) = detail::ReduceMul16(a); + return even * odd; +} +template <> +EIGEN_STRONG_INLINE double predux_mul(const Packet8d& a) { double even, odd; std::tie(even, odd) = detail::ReduceMul8(a); return even * odd; } -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES - -// --- predux for complex --- -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 - template <> -EIGEN_STRONG_INLINE std::complex predux(const PacketXcf& a) { - float re, im; - std::tie(re, im) = detail::ReduceAdd4(a.v); - return std::complex(re, im); -} - -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 - -template <> -EIGEN_STRONG_INLINE std::complex predux(const PacketXcf& a) { - float re, im; - std::tie(re, im) = detail::ReduceAdd8(a.v); - return std::complex(re, im); -} - -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 - -template <> -EIGEN_STRONG_INLINE std::complex predux(const PacketXcf& a) { +EIGEN_STRONG_INLINE std::complex predux(const Packet8cf& a) { float re, im; std::tie(re, im) = detail::ReduceAdd16(a.v); return std::complex(re, im); } -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES - -// --- predux for complex --- -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 - template <> -EIGEN_STRONG_INLINE std::complex predux(const PacketXcd& a) { - // 1 complex double: just return it - return a[0]; -} - -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 - -template <> -EIGEN_STRONG_INLINE std::complex predux(const PacketXcd& a) { - double re, im; - std::tie(re, im) = detail::ReduceAdd4(a.v); - return std::complex(re, im); -} - -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 - -template <> -EIGEN_STRONG_INLINE std::complex predux(const PacketXcd& a) { +EIGEN_STRONG_INLINE std::complex predux(const Packet4cd& a) { double re, im; std::tie(re, im) = detail::ReduceAdd8(a.v); return std::complex(re, im); } -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES - -// --- predux_mul for complex --- -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 - template <> -EIGEN_STRONG_INLINE std::complex predux_mul(const PacketXcf& a) { - // 2 complex floats: just multiply them - return a[0] * a[1]; -} - -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 - -template <> -EIGEN_STRONG_INLINE std::complex predux_mul(const PacketXcf& a) { - // 4 complex floats: split into 2+2, multiply, then scalar multiply - const Packet2cf lower2 = Packet2cf(__builtin_shufflevector(a.v, a.v, 0, 1, 2, 3)); - const Packet2cf upper2 = Packet2cf(__builtin_shufflevector(a.v, a.v, 4, 5, 6, 7)); - const Packet2cf prod2 = pmul(lower2, upper2); - return prod2[0] * prod2[1]; -} - -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 - -template <> -EIGEN_STRONG_INLINE std::complex predux_mul(const PacketXcf& a) { - // 8 complex floats: 8->4->2->scalar +EIGEN_STRONG_INLINE std::complex predux_mul(const Packet8cf& a) { const Packet4cf lower4 = Packet4cf(__builtin_shufflevector(a.v, a.v, 0, 1, 2, 3, 4, 5, 6, 7)); const Packet4cf upper4 = Packet4cf(__builtin_shufflevector(a.v, a.v, 8, 9, 10, 11, 12, 13, 14, 15)); const Packet4cf prod4 = pmul(lower4, upper4); @@ -315,38 +146,14 @@ EIGEN_STRONG_INLINE std::complex predux_mul(const PacketXcf& a return prod2[0] * prod2[1]; } -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES - -// --- predux_mul for complex --- -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 - template <> -EIGEN_STRONG_INLINE std::complex predux_mul(const PacketXcd& a) { - // 1 complex double: just return it - return a[0]; -} - -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 - -template <> -EIGEN_STRONG_INLINE std::complex predux_mul(const PacketXcd& a) { - // 2 complex doubles: just multiply them - return a[0] * a[1]; -} - -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 - -template <> -EIGEN_STRONG_INLINE std::complex predux_mul(const PacketXcd& a) { - // 4 complex doubles: split into 2+2, multiply, then scalar multiply +EIGEN_STRONG_INLINE std::complex predux_mul(const Packet4cd& a) { const Packet2cd lower2 = Packet2cd(__builtin_shufflevector(a.v, a.v, 0, 1, 2, 3)); const Packet2cd upper2 = Packet2cd(__builtin_shufflevector(a.v, a.v, 4, 5, 6, 7)); const Packet2cd prod2 = pmul(lower2, upper2); return prod2[0] * prod2[1]; } -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES - #endif } // end namespace internal diff --git a/Eigen/src/Core/arch/clang/TypeCasting.h b/Eigen/src/Core/arch/clang/TypeCasting.h index 75281b2a2..87ac9ea48 100644 --- a/Eigen/src/Core/arch/clang/TypeCasting.h +++ b/Eigen/src/Core/arch/clang/TypeCasting.h @@ -20,140 +20,56 @@ namespace internal { // preinterpret //============================================================================== template <> -EIGEN_STRONG_INLINE PacketXf preinterpret(const PacketXi& a) { - return reinterpret_cast(a); +EIGEN_STRONG_INLINE Packet16f preinterpret(const Packet16i& a) { + return reinterpret_cast(a); } template <> -EIGEN_STRONG_INLINE PacketXi preinterpret(const PacketXf& a) { - return reinterpret_cast(a); +EIGEN_STRONG_INLINE Packet16i preinterpret(const Packet16f& a) { + return reinterpret_cast(a); } template <> -EIGEN_STRONG_INLINE PacketXd preinterpret(const PacketXl& a) { - return reinterpret_cast(a); +EIGEN_STRONG_INLINE Packet8d preinterpret(const Packet8l& a) { + return reinterpret_cast(a); } template <> -EIGEN_STRONG_INLINE PacketXl preinterpret(const PacketXd& a) { - return reinterpret_cast(a); +EIGEN_STRONG_INLINE Packet8l preinterpret(const Packet8d& a) { + return reinterpret_cast(a); } //============================================================================== // pcast //============================================================================== #if EIGEN_HAS_BUILTIN(__builtin_convertvector) -// Float-to-int conversions: __builtin_convertvector has UB for NaN/inf/ -// out-of-range inputs. Replace NaN with 0 before converting so that -// pldexp_fast (which may pass NaN exponents) doesn't trigger UB. template <> -EIGEN_STRONG_INLINE PacketXi pcast(const PacketXf& a) { - const PacketXf safe = a == a ? a : PacketXf(0); - return __builtin_convertvector(safe, PacketXi); +EIGEN_STRONG_INLINE Packet16i pcast(const Packet16f& a) { + return __builtin_convertvector(a, Packet16i); } template <> -EIGEN_STRONG_INLINE PacketXf pcast(const PacketXi& a) { - return __builtin_convertvector(a, PacketXf); +EIGEN_STRONG_INLINE Packet16f pcast(const Packet16i& a) { + return __builtin_convertvector(a, Packet16f); } template <> -EIGEN_STRONG_INLINE PacketXl pcast(const PacketXd& a) { - const PacketXd safe = a == a ? a : PacketXd(0); - return __builtin_convertvector(safe, PacketXl); +EIGEN_STRONG_INLINE Packet8l pcast(const Packet8d& a) { + return __builtin_convertvector(a, Packet8l); } template <> -EIGEN_STRONG_INLINE PacketXd pcast(const PacketXl& a) { - return __builtin_convertvector(a, PacketXd); +EIGEN_STRONG_INLINE Packet8d pcast(const Packet8l& a) { + return __builtin_convertvector(a, Packet8d); } -// float -> double: converts lower half of floats to doubles -// double -> float: converts two PacketXd to one PacketXf -// int32 -> int64: converts lower half of int32s to int64s -// int64 -> int32: converts two PacketXl to one PacketXi - -#if EIGEN_GENERIC_VECTOR_SIZE_BYTES == 16 - -// float -> double: converts lower 2 floats to 2 doubles -template <> -EIGEN_STRONG_INLINE PacketXd pcast(const PacketXf& a) { - using HalfFloat = detail::VectorType; - HalfFloat lo = __builtin_shufflevector(a, a, 0, 1); - return __builtin_convertvector(lo, PacketXd); -} - -// double -> float: converts two PacketXd (2 doubles each) to one PacketXf (4 floats) -template <> -EIGEN_STRONG_INLINE PacketXf pcast(const PacketXd& a, const PacketXd& b) { - using HalfFloat = detail::VectorType; - HalfFloat lo = __builtin_convertvector(a, HalfFloat); - HalfFloat hi = __builtin_convertvector(b, HalfFloat); - return __builtin_shufflevector(lo, hi, 0, 1, 2, 3); -} - -// int32 -> int64: converts lower 2 int32s to 2 int64s -template <> -EIGEN_STRONG_INLINE PacketXl pcast(const PacketXi& a) { - using HalfInt = detail::VectorType; - HalfInt lo = __builtin_shufflevector(a, a, 0, 1); - return __builtin_convertvector(lo, PacketXl); -} - -// int64 -> int32: converts two PacketXl (2 int64s each) to one PacketXi (4 int32s) -template <> -EIGEN_STRONG_INLINE PacketXi pcast(const PacketXl& a, const PacketXl& b) { - using HalfInt = detail::VectorType; - HalfInt lo = __builtin_convertvector(a, HalfInt); - HalfInt hi = __builtin_convertvector(b, HalfInt); - return __builtin_shufflevector(lo, hi, 0, 1, 2, 3); -} - -#elif EIGEN_GENERIC_VECTOR_SIZE_BYTES == 32 - -// float -> double: converts lower 4 floats to 4 doubles -template <> -EIGEN_STRONG_INLINE PacketXd pcast(const PacketXf& a) { - using HalfFloat = detail::VectorType; - HalfFloat lo = __builtin_shufflevector(a, a, 0, 1, 2, 3); - return __builtin_convertvector(lo, PacketXd); -} - -// double -> float: converts two PacketXd (4 doubles each) to one PacketXf (8 floats) -template <> -EIGEN_STRONG_INLINE PacketXf pcast(const PacketXd& a, const PacketXd& b) { - using HalfFloat = detail::VectorType; - HalfFloat lo = __builtin_convertvector(a, HalfFloat); - HalfFloat hi = __builtin_convertvector(b, HalfFloat); - return __builtin_shufflevector(lo, hi, 0, 1, 2, 3, 4, 5, 6, 7); -} - -// int32 -> int64: converts lower 4 int32s to 4 int64s -template <> -EIGEN_STRONG_INLINE PacketXl pcast(const PacketXi& a) { - using HalfInt = detail::VectorType; - HalfInt lo = __builtin_shufflevector(a, a, 0, 1, 2, 3); - return __builtin_convertvector(lo, PacketXl); -} - -// int64 -> int32: converts two PacketXl (4 int64s each) to one PacketXi (8 int32s) -template <> -EIGEN_STRONG_INLINE PacketXi pcast(const PacketXl& a, const PacketXl& b) { - using HalfInt = detail::VectorType; - HalfInt lo = __builtin_convertvector(a, HalfInt); - HalfInt hi = __builtin_convertvector(b, HalfInt); - return __builtin_shufflevector(lo, hi, 0, 1, 2, 3, 4, 5, 6, 7); -} - -#else // EIGEN_GENERIC_VECTOR_SIZE_BYTES == 64 - // float -> double: converts lower 8 floats to 8 doubles template <> -EIGEN_STRONG_INLINE PacketXd pcast(const PacketXf& a) { +EIGEN_STRONG_INLINE Packet8d pcast(const Packet16f& a) { using HalfFloat = detail::VectorType; HalfFloat lo = __builtin_shufflevector(a, a, 0, 1, 2, 3, 4, 5, 6, 7); - return __builtin_convertvector(lo, PacketXd); + return __builtin_convertvector(lo, Packet8d); } -// double -> float: converts two PacketXd to one PacketXf +// double -> float: converts two Packet8d to one Packet16f template <> -EIGEN_STRONG_INLINE PacketXf pcast(const PacketXd& a, const PacketXd& b) { +EIGEN_STRONG_INLINE Packet16f pcast(const Packet8d& a, const Packet8d& b) { using HalfFloat = detail::VectorType; HalfFloat lo = __builtin_convertvector(a, HalfFloat); HalfFloat hi = __builtin_convertvector(b, HalfFloat); @@ -162,22 +78,20 @@ EIGEN_STRONG_INLINE PacketXf pcast(const PacketXd& a, const // int32 -> int64: converts lower 8 int32s to 8 int64s template <> -EIGEN_STRONG_INLINE PacketXl pcast(const PacketXi& a) { +EIGEN_STRONG_INLINE Packet8l pcast(const Packet16i& a) { using HalfInt = detail::VectorType; HalfInt lo = __builtin_shufflevector(a, a, 0, 1, 2, 3, 4, 5, 6, 7); - return __builtin_convertvector(lo, PacketXl); + return __builtin_convertvector(lo, Packet8l); } -// int64 -> int32: converts two PacketXl to one PacketXi +// int64 -> int32: converts two Packet8l to one Packet16i template <> -EIGEN_STRONG_INLINE PacketXi pcast(const PacketXl& a, const PacketXl& b) { +EIGEN_STRONG_INLINE Packet16i pcast(const Packet8l& a, const Packet8l& b) { using HalfInt = detail::VectorType; HalfInt lo = __builtin_convertvector(a, HalfInt); HalfInt hi = __builtin_convertvector(b, HalfInt); return __builtin_shufflevector(lo, hi, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } - -#endif // EIGEN_GENERIC_VECTOR_SIZE_BYTES #endif } // end namespace internal diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 6c183dbac..d167710bf 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -179,7 +179,6 @@ ei_add_test(numext) ei_add_test(sizeof) ei_add_test(dynalloc) ei_add_test(nomalloc) -ei_add_test(noresize) ei_add_test(first_aligned) ei_add_test(type_alias) ei_add_test(nullary) @@ -187,17 +186,6 @@ ei_add_test(mixingtypes) ei_add_test(float_conversion) ei_add_test(io) ei_add_test(packetmath "-DEIGEN_FAST_MATH=1") -# Generic clang vector backend tests for different vector sizes. -include(CheckCXXSourceCompiles) -check_cxx_source_compiles(" - typedef float v4sf __attribute__((ext_vector_type(4))); - int main() { return __builtin_vectorelements(v4sf{}); } -" COMPILER_SUPPORTS_VECTOR_EXTENSIONS) -if(COMPILER_SUPPORTS_VECTOR_EXTENSIONS) - ei_add_test(packetmath_generic_16 "-DEIGEN_FAST_MATH=1") - ei_add_test(packetmath_generic_32 "-DEIGEN_FAST_MATH=1") - ei_add_test(packetmath_generic_64 "-DEIGEN_FAST_MATH=1") -endif() ei_add_test(packet_segment) ei_add_test(vectorization_logic) ei_add_test(basicstuff) diff --git a/test/evaluators.cpp b/test/evaluators.cpp index 4b55ebcaf..5a4ab3764 100644 --- a/test/evaluators.cpp +++ b/test/evaluators.cpp @@ -33,8 +33,6 @@ EIGEN_STRONG_INLINE DstXprType& copy_using_evaluator(const PlainObjectBase -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// Must be defined before including any Eigen headers. -#define EIGEN_NO_AUTOMATIC_RESIZING - -#include "main.h" - -// Helper to create a random matrix respecting compile-time fixed dimensions. -template -MatrixType random_matrix() { - enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime }; - Index rows = (RowsAtCompileTime == Dynamic) ? internal::random(1, 10) : Index(RowsAtCompileTime); - Index cols = (ColsAtCompileTime == Dynamic) ? internal::random(1, 10) : Index(ColsAtCompileTime); - return MatrixType::Random(rows, cols); -} - -template -void noresize_assign_to_empty() { - MatrixType src = random_matrix(); - - // Assigning to a default-constructed (empty) destination should work. - MatrixType dst; - dst = src; - VERIFY_IS_EQUAL(dst.rows(), src.rows()); - VERIFY_IS_EQUAL(dst.cols(), src.cols()); - VERIFY_IS_APPROX(dst, src); -} - -template -void noresize_assign_expression_to_empty() { - MatrixType a = random_matrix(); - MatrixType b(a.rows(), a.cols()); - b.setRandom(); - - // Assigning an expression to an empty destination should work. - MatrixType dst; - dst = a + b; - VERIFY_IS_EQUAL(dst.rows(), a.rows()); - VERIFY_IS_EQUAL(dst.cols(), a.cols()); - VERIFY_IS_APPROX(dst, a + b); -} - -template -void noresize_construct_from_expression() { - MatrixType a = random_matrix(); - - // Construction from an expression should work. - MatrixType dst = a * 2; - VERIFY_IS_EQUAL(dst.rows(), a.rows()); - VERIFY_IS_EQUAL(dst.cols(), a.cols()); - VERIFY_IS_APPROX(dst, a * 2); -} - -template -void noresize_col_access() { - MatrixType src = random_matrix(); - - // Assigning to empty, then accessing columns should work. - MatrixType dst; - dst = src; - for (Index j = 0; j < src.cols(); ++j) { - VERIFY_IS_APPROX(dst.col(j), src.col(j)); - } -} - -template -void noresize_size_mismatch() { - enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime }; - Index rows = (RowsAtCompileTime == Dynamic) ? internal::random(2, 10) : Index(RowsAtCompileTime); - Index cols = (ColsAtCompileTime == Dynamic) ? internal::random(2, 10) : Index(ColsAtCompileTime); - MatrixType src = MatrixType::Random(rows, cols); - // Create a destination with at least one mismatched dynamic dimension. - Index dst_rows = (RowsAtCompileTime == Dynamic) ? rows + 1 : rows; - Index dst_cols = (ColsAtCompileTime == Dynamic) ? cols + 1 : cols; - MatrixType dst = MatrixType::Random(dst_rows, dst_cols); - - // Assigning to a non-empty destination with different size should assert. - VERIFY_RAISES_ASSERT(dst = src); -} - -EIGEN_DECLARE_TEST(noresize) { - CALL_SUBTEST_1(noresize_assign_to_empty()); - CALL_SUBTEST_1(noresize_assign_to_empty()); - CALL_SUBTEST_1(noresize_assign_to_empty()); - CALL_SUBTEST_1(noresize_assign_to_empty()); - CALL_SUBTEST_2(noresize_assign_to_empty()); - CALL_SUBTEST_2(noresize_assign_to_empty()); - CALL_SUBTEST_3(noresize_assign_to_empty()); - CALL_SUBTEST_3(noresize_assign_to_empty()); - - CALL_SUBTEST_4(noresize_assign_expression_to_empty()); - CALL_SUBTEST_4(noresize_assign_expression_to_empty()); - - CALL_SUBTEST_5(noresize_construct_from_expression()); - CALL_SUBTEST_5(noresize_construct_from_expression()); - - CALL_SUBTEST_6(noresize_col_access()); - CALL_SUBTEST_6(noresize_col_access()); - - CALL_SUBTEST_7(noresize_size_mismatch()); - CALL_SUBTEST_7(noresize_size_mismatch()); - CALL_SUBTEST_7(noresize_size_mismatch()); -} diff --git a/test/packetmath_generic_16.cpp b/test/packetmath_generic_16.cpp deleted file mode 100644 index 612a75c65..000000000 --- a/test/packetmath_generic_16.cpp +++ /dev/null @@ -1,4 +0,0 @@ -// Force the generic clang vector backend with 16-byte vectors. -#define EIGEN_VECTORIZE_GENERIC 1 -#define EIGEN_GENERIC_VECTOR_SIZE_BYTES 16 -#include "packetmath.cpp" diff --git a/test/packetmath_generic_32.cpp b/test/packetmath_generic_32.cpp deleted file mode 100644 index 9816f9fa5..000000000 --- a/test/packetmath_generic_32.cpp +++ /dev/null @@ -1,4 +0,0 @@ -// Force the generic clang vector backend with 32-byte vectors. -#define EIGEN_VECTORIZE_GENERIC 1 -#define EIGEN_GENERIC_VECTOR_SIZE_BYTES 32 -#include "packetmath.cpp" diff --git a/test/packetmath_generic_64.cpp b/test/packetmath_generic_64.cpp deleted file mode 100644 index 69575449e..000000000 --- a/test/packetmath_generic_64.cpp +++ /dev/null @@ -1,4 +0,0 @@ -// Force the generic clang vector backend with 64-byte vectors. -#define EIGEN_VECTORIZE_GENERIC 1 -#define EIGEN_GENERIC_VECTOR_SIZE_BYTES 64 -#include "packetmath.cpp"