From b6fcddccfcf0d627a7309bee20d92363548ac07c Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen <4643818-rmlarsen1@users.noreply.gitlab.com> Date: Mon, 3 Nov 2025 23:27:50 +0000 Subject: [PATCH] Get rid of pblend packet op. There was only a single code path left in TensorEvaluator using pblend. We can replace that with a call to the more general TernarySelectOp and get rid of pblend entirely from Core. Closes #2998 See merge request libeigen/eigen!2056 Co-authored-by: Rasmus Munk Larsen --- Eigen/src/Core/GenericPacketMath.h | 16 ---- Eigen/src/Core/arch/AVX/PacketMath.h | 30 ------ Eigen/src/Core/arch/AVX512/PacketMath.h | 21 ---- Eigen/src/Core/arch/AltiVec/Complex.h | 14 --- Eigen/src/Core/arch/AltiVec/PacketMath.h | 84 ---------------- Eigen/src/Core/arch/LSX/PacketMath.h | 10 -- Eigen/src/Core/arch/MSA/Complex.h | 7 -- Eigen/src/Core/arch/MSA/PacketMath.h | 27 ------ Eigen/src/Core/arch/NEON/PacketMath.h | 12 --- Eigen/src/Core/arch/SSE/Complex.h | 8 -- Eigen/src/Core/arch/SSE/PacketMath.h | 38 -------- Eigen/src/Core/arch/SVE/PacketMath.h | 2 - Eigen/src/Core/arch/SYCL/PacketMath.h | 25 ----- Eigen/src/Core/arch/ZVector/Complex.h | 17 ---- Eigen/src/Core/arch/ZVector/PacketMath.h | 96 ++++--------------- test/packetmath.cpp | 16 ---- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 15 +-- 17 files changed, 26 insertions(+), 412 deletions(-) diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 64e11231e..5c8bbce6d 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -63,7 +63,6 @@ struct default_packet_traits { HasArg = 0, HasAbsDiff = 0, - HasBlend = 0, // This flag is used to indicate whether packet comparison is supported. // pcmp_eq and pcmp_lt should be defined for it to be true. HasCmp = 0, @@ -1482,21 +1481,6 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& /*kernel*/) { // Nothing to do in the scalar case, i.e. a 1x1 matrix. } -/*************************************************************************** - * Selector, i.e. vector of N boolean values used to select (i.e. blend) - * words from 2 packets. - ***************************************************************************/ -template -struct Selector { - bool select[N]; -}; - -template -EIGEN_DEVICE_FUNC inline Packet pblend(const Selector::size>& ifPacket, - const Packet& thenPacket, const Packet& elsePacket) { - return ifPacket.select[0] ? thenPacket : elsePacket; -} - /** \internal \returns 1 / a (coeff-wise) */ template EIGEN_DEVICE_FUNC inline Packet preciprocal(const Packet& a) { diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 54041bf8a..82caebb9b 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -127,7 +127,6 @@ struct packet_traits : default_packet_traits { HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, HasErfc = EIGEN_FAST_MATH, - HasBlend = 1 }; }; template <> @@ -158,7 +157,6 @@ struct packet_traits : default_packet_traits { HasCbrt = 1, HasATan = 1, HasATanh = 1, - HasBlend = 1 }; }; @@ -193,7 +191,6 @@ struct packet_traits : default_packet_traits { HasRsqrt = 1, HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, - HasBlend = 0, HasBessel = 1, HasNdtri = 1 }; @@ -231,7 +228,6 @@ struct packet_traits : default_packet_traits { HasRsqrt = 1, HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, - HasBlend = 0, HasBessel = 1, HasNdtri = 1 }; @@ -284,7 +280,6 @@ struct packet_traits : default_packet_traits { // HasMin = 0, // HasMax = 0, HasDiv = 0, - HasBlend = 0, HasTranspose = 0, HasNegate = 0, HasSqrt = 0, @@ -2070,31 +2065,6 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49); } -EIGEN_STRONG_INLINE __m256i avx_blend_mask(const Selector<4>& ifPacket) { - return _mm256_set_epi64x(0 - ifPacket.select[3], 0 - ifPacket.select[2], 0 - ifPacket.select[1], - 0 - ifPacket.select[0]); -} - -EIGEN_STRONG_INLINE __m256i avx_blend_mask(const Selector<8>& ifPacket) { - return _mm256_set_epi32(0 - ifPacket.select[7], 0 - ifPacket.select[6], 0 - ifPacket.select[5], - 0 - ifPacket.select[4], 0 - ifPacket.select[3], 0 - ifPacket.select[2], - 0 - ifPacket.select[1], 0 - ifPacket.select[0]); -} - -template <> -EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, - const Packet8f& elsePacket) { - const __m256 true_mask = _mm256_castsi256_ps(avx_blend_mask(ifPacket)); - return pselect(true_mask, thenPacket, elsePacket); -} - -template <> -EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, - const Packet4d& elsePacket) { - const __m256d true_mask = _mm256_castsi256_pd(avx_blend_mask(ifPacket)); - return pselect(true_mask, thenPacket, elsePacket); -} - // Packet math for Eigen::half #ifndef EIGEN_VECTORIZE_AVX512FP16 template <> diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 2aec9ac6b..b18105390 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -2058,27 +2058,6 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { PACK_OUTPUT_I32_2(kernel.packet, tmp.packet, 3, 1); } -template -EIGEN_STRONG_INLINE int avx512_blend_mask(const Selector& ifPacket) { - alignas(__m128i) uint8_t aux[sizeof(__m128i)]; - for (size_t i = 0; i < N; i++) aux[i] = static_cast(ifPacket.select[i]); - __m128i paux = _mm_sub_epi8(_mm_setzero_si128(), _mm_load_si128(reinterpret_cast(aux))); - return _mm_movemask_epi8(paux); -} - -template <> -EIGEN_STRONG_INLINE Packet16f pblend(const Selector<16>& ifPacket, const Packet16f& thenPacket, - const Packet16f& elsePacket) { - __mmask16 m = avx512_blend_mask(ifPacket); - return _mm512_mask_blend_ps(m, elsePacket, thenPacket); -} -template <> -EIGEN_STRONG_INLINE Packet8d pblend(const Selector<8>& ifPacket, const Packet8d& thenPacket, - const Packet8d& elsePacket) { - __mmask8 m = avx512_blend_mask(ifPacket); - return _mm512_mask_blend_pd(m, elsePacket, thenPacket); -} - // Packet math for Eigen::half #ifndef EIGEN_VECTORIZE_AVX512FP16 template <> diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index d6df59af6..d49f13670 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -109,9 +109,6 @@ struct packet_traits > : default_packet_traits { HasSqrt = 1, HasLog = 1, HasExp = 1, -#ifdef EIGEN_VECTORIZE_VSX - HasBlend = 1, -#endif HasSetLinear = 0 }; }; @@ -364,17 +361,6 @@ EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(eq, vec_perm(eq, eq, p16uc_COMPLEX32_REV))); } -#ifdef EIGEN_VECTORIZE_VSX -template <> -EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, - const Packet2cf& elsePacket) { - Packet2cf result; - result.v = reinterpret_cast( - pblend(ifPacket, reinterpret_cast(thenPacket.v), reinterpret_cast(elsePacket.v))); - return result; -} -#endif - template <> EIGEN_STRONG_INLINE Packet2cf psqrt(const Packet2cf& a) { return psqrt_complex(a); diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 50f8a321e..c98f21767 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -206,7 +206,6 @@ struct packet_traits : default_packet_traits { HasErf = 0, #endif HasNegate = 1, - HasBlend = 1 }; }; template <> @@ -243,7 +242,6 @@ struct packet_traits : default_packet_traits { HasTanh = 0, HasErf = 0, HasNegate = 1, - HasBlend = 1 }; }; @@ -265,7 +263,6 @@ struct packet_traits : default_packet_traits { #else HasDiv = 0, #endif - HasBlend = 1, HasCmp = 1 }; }; @@ -283,7 +280,6 @@ struct packet_traits : default_packet_traits { HasSub = 1, HasMul = 1, HasDiv = 0, - HasBlend = 1, HasCmp = 1 }; }; @@ -301,7 +297,6 @@ struct packet_traits : default_packet_traits { HasSub = 1, HasMul = 1, HasDiv = 0, - HasBlend = 1, HasCmp = 1 }; }; @@ -319,7 +314,6 @@ struct packet_traits : default_packet_traits { HasSub = 1, HasMul = 1, HasDiv = 0, - HasBlend = 1, HasCmp = 1 }; }; @@ -337,7 +331,6 @@ struct packet_traits : default_packet_traits { HasSub = 1, HasMul = 1, HasDiv = 0, - HasBlend = 1, HasCmp = 1 }; }; @@ -3055,74 +3048,6 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { kernel.packet[15] = vec_mergel(step3[7], step3[15]); } -template -EIGEN_STRONG_INLINE Packet pblend4(const Selector<4>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) { - Packet4ui select = {ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3]}; - Packet4ui mask = reinterpret_cast(pnegate(reinterpret_cast(select))); - return vec_sel(elsePacket, thenPacket, mask); -} - -template <> -EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, - const Packet4i& elsePacket) { - return pblend4(ifPacket, thenPacket, elsePacket); -} - -template <> -EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, - const Packet4f& elsePacket) { - return pblend4(ifPacket, thenPacket, elsePacket); -} - -template <> -EIGEN_STRONG_INLINE Packet8s pblend(const Selector<8>& ifPacket, const Packet8s& thenPacket, - const Packet8s& elsePacket) { - Packet8us select = {ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3], - ifPacket.select[4], ifPacket.select[5], ifPacket.select[6], ifPacket.select[7]}; - Packet8us mask = reinterpret_cast(pnegate(reinterpret_cast(select))); - Packet8s result = vec_sel(elsePacket, thenPacket, mask); - return result; -} - -template <> -EIGEN_STRONG_INLINE Packet8us pblend(const Selector<8>& ifPacket, const Packet8us& thenPacket, - const Packet8us& elsePacket) { - Packet8us select = {ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3], - ifPacket.select[4], ifPacket.select[5], ifPacket.select[6], ifPacket.select[7]}; - Packet8us mask = reinterpret_cast(pnegate(reinterpret_cast(select))); - return vec_sel(elsePacket, thenPacket, mask); -} - -template <> -EIGEN_STRONG_INLINE Packet8bf pblend(const Selector<8>& ifPacket, const Packet8bf& thenPacket, - const Packet8bf& elsePacket) { - return pblend(ifPacket, thenPacket, elsePacket); -} - -template <> -EIGEN_STRONG_INLINE Packet16c pblend(const Selector<16>& ifPacket, const Packet16c& thenPacket, - const Packet16c& elsePacket) { - Packet16uc select = {ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3], - ifPacket.select[4], ifPacket.select[5], ifPacket.select[6], ifPacket.select[7], - ifPacket.select[8], ifPacket.select[9], ifPacket.select[10], ifPacket.select[11], - ifPacket.select[12], ifPacket.select[13], ifPacket.select[14], ifPacket.select[15]}; - - Packet16uc mask = reinterpret_cast(pnegate(reinterpret_cast(select))); - return vec_sel(elsePacket, thenPacket, mask); -} - -template <> -EIGEN_STRONG_INLINE Packet16uc pblend(const Selector<16>& ifPacket, const Packet16uc& thenPacket, - const Packet16uc& elsePacket) { - Packet16uc select = {ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3], - ifPacket.select[4], ifPacket.select[5], ifPacket.select[6], ifPacket.select[7], - ifPacket.select[8], ifPacket.select[9], ifPacket.select[10], ifPacket.select[11], - ifPacket.select[12], ifPacket.select[13], ifPacket.select[14], ifPacket.select[15]}; - - Packet16uc mask = reinterpret_cast(pnegate(reinterpret_cast(select))); - return vec_sel(elsePacket, thenPacket, mask); -} - //---------- double ---------- #ifdef EIGEN_VECTORIZE_VSX typedef __vector double Packet2d; @@ -3191,7 +3116,6 @@ struct packet_traits : default_packet_traits { HasRsqrt = 0, #endif HasNegate = 1, - HasBlend = 1 }; }; @@ -3717,14 +3641,6 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { kernel.packet[1] = t1; } -template <> -EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, - const Packet2d& elsePacket) { - Packet2l select = {ifPacket.select[0], ifPacket.select[1]}; - Packet2ul mask = reinterpret_cast(pnegate(reinterpret_cast(select))); - return vec_sel(elsePacket, thenPacket, mask); -} - #endif // __VSX__ } // end namespace internal diff --git a/Eigen/src/Core/arch/LSX/PacketMath.h b/Eigen/src/Core/arch/LSX/PacketMath.h index 2926036ae..77ffdd17f 100644 --- a/Eigen/src/Core/arch/LSX/PacketMath.h +++ b/Eigen/src/Core/arch/LSX/PacketMath.h @@ -171,7 +171,6 @@ struct packet_traits : default_packet_traits { HasSetLinear = 0, HasCmp = 1, - HasBlend = 0 }; }; @@ -187,7 +186,6 @@ struct packet_traits : default_packet_traits { HasSetLinear = 0, HasCmp = 1, HasDiv = 1, - HasBlend = 0 }; }; @@ -203,7 +201,6 @@ struct packet_traits : default_packet_traits { HasSetLinear = 0, HasCmp = 1, HasDiv = 1, - HasBlend = 0 }; }; @@ -219,7 +216,6 @@ struct packet_traits : default_packet_traits { HasSetLinear = 0, HasCmp = 1, HasDiv = 1, - HasBlend = 0 }; }; @@ -235,7 +231,6 @@ struct packet_traits : default_packet_traits { HasSetLinear = 0, HasNegate = 0, HasCmp = 1, - HasBlend = 0 }; }; @@ -252,7 +247,6 @@ struct packet_traits : default_packet_traits { HasNegate = 0, HasCmp = 1, HasDiv = 1, - HasBlend = 0 }; }; @@ -269,7 +263,6 @@ struct packet_traits : default_packet_traits { HasNegate = 0, HasCmp = 1, HasDiv = 1, - HasBlend = 0 }; }; @@ -286,7 +279,6 @@ struct packet_traits : default_packet_traits { HasNegate = 0, HasCmp = 1, HasDiv = 1, - HasBlend = 0 }; }; @@ -300,7 +292,6 @@ struct packet_traits : default_packet_traits { size = 4, HasSetLinear = 0, - HasBlend = 0, HasSign = 0, HasDiv = 1, HasExp = 1, @@ -320,7 +311,6 @@ struct packet_traits : default_packet_traits { size = 2, HasSetLinear = 0, - HasBlend = 0, HasSign = 0, HasDiv = 1, HasSqrt = 1, diff --git a/Eigen/src/Core/arch/MSA/Complex.h b/Eigen/src/Core/arch/MSA/Complex.h index 2d2fbbca4..fbba642eb 100644 --- a/Eigen/src/Core/arch/MSA/Complex.h +++ b/Eigen/src/Core/arch/MSA/Complex.h @@ -105,7 +105,6 @@ struct packet_traits > : default_packet_traits { HasMin = 0, HasMax = 0, HasSetLinear = 0, - HasBlend = 1 }; }; @@ -314,12 +313,6 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { kernel.packet[1].v = tmp; } -template <> -EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, - const Packet2cf& elsePacket) { - return (Packet2cf)(Packet4f)pblend(ifPacket, (Packet2d)thenPacket.v, (Packet2d)elsePacket.v); -} - //---------- double ---------- struct Packet1cd { diff --git a/Eigen/src/Core/arch/MSA/PacketMath.h b/Eigen/src/Core/arch/MSA/PacketMath.h index 81da24f8d..2d5032a62 100644 --- a/Eigen/src/Core/arch/MSA/PacketMath.h +++ b/Eigen/src/Core/arch/MSA/PacketMath.h @@ -91,7 +91,6 @@ struct packet_traits : default_packet_traits { HasExp = 1, HasSqrt = 1, HasRsqrt = 1, - HasBlend = 1 }; }; @@ -105,7 +104,6 @@ struct packet_traits : default_packet_traits { size = 4, // FIXME check the Has* HasDiv = 1, - HasBlend = 1 }; }; @@ -802,22 +800,6 @@ EIGEN_STRONG_INLINE Packet4f pround(const Packet4f& a) { return v; } -template <> -EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, - const Packet4f& elsePacket) { - Packet4ui select = {ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3]}; - Packet4i mask = __builtin_msa_ceqi_w((Packet4i)select, 0); - return (Packet4f)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket); -} - -template <> -EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, - const Packet4i& elsePacket) { - Packet4ui select = {ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3]}; - Packet4i mask = __builtin_msa_ceqi_w((Packet4i)select, 0); - return (Packet4i)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket); -} - //---------- double ---------- typedef v2f64 Packet2d; @@ -856,7 +838,6 @@ struct packet_traits : default_packet_traits { HasExp = 1, HasSqrt = 1, HasRsqrt = 1, - HasBlend = 1 }; }; @@ -1222,14 +1203,6 @@ EIGEN_STRONG_INLINE Packet2d pround(const Packet2d& a) { return v; } -template <> -EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, - const Packet2d& elsePacket) { - Packet2ul select = {ifPacket.select[0], ifPacket.select[1]}; - Packet2l mask = __builtin_msa_ceqi_d((Packet2l)select, 0); - return (Packet2d)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket); -} - } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index ae81fedc8..b9d0866a0 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -194,7 +194,6 @@ struct packet_traits : default_packet_traits { HasMax = 1, HasConj = 1, HasSetLinear = 1, - HasBlend = 0, HasDiv = 1, HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, @@ -240,7 +239,6 @@ struct packet_traits : default_packet_traits { HasMax = 1, HasConj = 1, HasSetLinear = 1, - HasBlend = 0 }; }; @@ -266,7 +264,6 @@ struct packet_traits : default_packet_traits { HasMax = 1, HasConj = 1, HasSetLinear = 1, - HasBlend = 0, HasSqrt = 1 }; @@ -294,7 +291,6 @@ struct packet_traits : default_packet_traits { HasMax = 1, HasConj = 1, HasSetLinear = 1, - HasBlend = 0 }; }; @@ -320,7 +316,6 @@ struct packet_traits : default_packet_traits { HasMax = 1, HasConj = 1, HasSetLinear = 1, - HasBlend = 0, HasSqrt = 1 }; }; @@ -347,7 +342,6 @@ struct packet_traits : default_packet_traits { HasMax = 1, HasConj = 1, HasSetLinear = 1, - HasBlend = 0 }; }; @@ -373,7 +367,6 @@ struct packet_traits : default_packet_traits { HasMax = 1, HasConj = 1, HasSetLinear = 1, - HasBlend = 0, HasSqrt = 1 }; @@ -401,7 +394,6 @@ struct packet_traits : default_packet_traits { HasMax = 1, HasConj = 1, HasSetLinear = 1, - HasBlend = 0 }; }; @@ -427,7 +419,6 @@ struct packet_traits : default_packet_traits { HasMax = 1, HasConj = 1, HasSetLinear = 1, - HasBlend = 0 }; }; @@ -4629,7 +4620,6 @@ struct packet_traits : default_packet_traits { HasMax = 1, HasConj = 1, HasSetLinear = 1, - HasBlend = 0, HasDiv = 1, HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, @@ -5013,7 +5003,6 @@ struct packet_traits : default_packet_traits { HasMax = 1, HasConj = 1, HasSetLinear = 1, - HasBlend = 0, HasDiv = 1, @@ -5388,7 +5377,6 @@ struct packet_traits : default_packet_traits { HasMax = 1, HasConj = 1, HasSetLinear = 1, - HasBlend = 0, HasInsert = 1, HasReduxp = 1, HasDiv = 1, diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index f79da7b8c..9dfe3343c 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -49,7 +49,6 @@ struct packet_traits > : default_packet_traits { HasMin = 0, HasMax = 0, HasSetLinear = 0, - HasBlend = 1 }; }; #endif @@ -413,13 +412,6 @@ EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pand(eq, vec2d_swizzle1(eq, 1, 0))); } -template <> -EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, - const Packet2cf& elsePacket) { - __m128d result = pblend(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v)); - return Packet2cf(_mm_castpd_ps(result)); -} - template <> EIGEN_STRONG_INLINE Packet1cd psqrt(const Packet1cd& a) { return psqrt_complex(a); diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 8a2cfb2cc..b8b1d9a5e 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -1999,44 +1999,6 @@ EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { kernel.packet[15] = _mm_unpackhi_epi64(u7, uf); } -EIGEN_STRONG_INLINE __m128i sse_blend_mask(const Selector<2>& ifPacket) { - return _mm_set_epi64x(0 - ifPacket.select[1], 0 - ifPacket.select[0]); -} - -EIGEN_STRONG_INLINE __m128i sse_blend_mask(const Selector<4>& ifPacket) { - return _mm_set_epi32(0 - ifPacket.select[3], 0 - ifPacket.select[2], 0 - ifPacket.select[1], 0 - ifPacket.select[0]); -} - -template <> -EIGEN_STRONG_INLINE Packet2l pblend(const Selector<2>& ifPacket, const Packet2l& thenPacket, - const Packet2l& elsePacket) { - const __m128i true_mask = sse_blend_mask(ifPacket); - return pselect(true_mask, thenPacket, elsePacket); -} -template <> -EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, - const Packet4i& elsePacket) { - const __m128i true_mask = sse_blend_mask(ifPacket); - return pselect(true_mask, thenPacket, elsePacket); -} -template <> -EIGEN_STRONG_INLINE Packet4ui pblend(const Selector<4>& ifPacket, const Packet4ui& thenPacket, - const Packet4ui& elsePacket) { - return (Packet4ui)pblend(ifPacket, (Packet4i)thenPacket, (Packet4i)elsePacket); -} -template <> -EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, - const Packet4f& elsePacket) { - const __m128i true_mask = sse_blend_mask(ifPacket); - return pselect(_mm_castsi128_ps(true_mask), thenPacket, elsePacket); -} -template <> -EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, - const Packet2d& elsePacket) { - const __m128i true_mask = sse_blend_mask(ifPacket); - return pselect(_mm_castsi128_pd(true_mask), thenPacket, elsePacket); -} - // Scalar path for pmadd with FMA to ensure consistency with vectorized path. #if defined(EIGEN_VECTORIZE_FMA) template <> diff --git a/Eigen/src/Core/arch/SVE/PacketMath.h b/Eigen/src/Core/arch/SVE/PacketMath.h index 0370ab14b..28fc62b83 100644 --- a/Eigen/src/Core/arch/SVE/PacketMath.h +++ b/Eigen/src/Core/arch/SVE/PacketMath.h @@ -53,7 +53,6 @@ struct packet_traits : default_packet_traits { HasMax = 1, HasConj = 1, HasSetLinear = 0, - HasBlend = 0, HasReduxp = 0 // Not implemented in SVE }; }; @@ -347,7 +346,6 @@ struct packet_traits : default_packet_traits { HasMax = 1, HasConj = 1, HasSetLinear = 0, - HasBlend = 0, HasReduxp = 0, // Not implemented in SVE HasDiv = 1, diff --git a/Eigen/src/Core/arch/SYCL/PacketMath.h b/Eigen/src/Core/arch/SYCL/PacketMath.h index 6b6bfe43b..e5dad3c3b 100644 --- a/Eigen/src/Core/arch/SYCL/PacketMath.h +++ b/Eigen/src/Core/arch/SYCL/PacketMath.h @@ -542,31 +542,6 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(PacketBlock -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 pblend( - const Selector::size>& ifPacket, const cl::sycl::cl_half8& thenPacket, - const cl::sycl::cl_half8& elsePacket) { - cl::sycl::cl_short8 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1, ifPacket.select[2] ? 0 : -1, - ifPacket.select[3] ? 0 : -1, ifPacket.select[4] ? 0 : -1, ifPacket.select[5] ? 0 : -1, - ifPacket.select[6] ? 0 : -1, ifPacket.select[7] ? 0 : -1); - return cl::sycl::select(thenPacket, elsePacket, condition); -} - -template <> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pblend( - const Selector::size>& ifPacket, const cl::sycl::cl_float4& thenPacket, - const cl::sycl::cl_float4& elsePacket) { - cl::sycl::cl_int4 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1, ifPacket.select[2] ? 0 : -1, - ifPacket.select[3] ? 0 : -1); - return cl::sycl::select(thenPacket, elsePacket, condition); -} - -template <> -inline cl::sycl::cl_double2 pblend(const Selector::size>& ifPacket, - const cl::sycl::cl_double2& thenPacket, const cl::sycl::cl_double2& elsePacket) { - cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1); - return cl::sycl::select(thenPacket, elsePacket, condition); -} #endif // SYCL_DEVICE_ONLY } // end namespace internal diff --git a/Eigen/src/Core/arch/ZVector/Complex.h b/Eigen/src/Core/arch/ZVector/Complex.h index 692f90f3e..dfcdcab1f 100644 --- a/Eigen/src/Core/arch/ZVector/Complex.h +++ b/Eigen/src/Core/arch/ZVector/Complex.h @@ -72,7 +72,6 @@ struct packet_traits > : default_packet_traits { HasAbs2 = 0, HasMin = 0, HasMax = 0, - HasBlend = 1, HasSetLinear = 0 }; }; @@ -469,14 +468,6 @@ EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { kernel.packet[1].cd[0] = tmp; } -template <> -EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, - const Packet2cf& elsePacket) { - Packet2cf result; - const Selector<4> ifPacket4 = {ifPacket.select[0], ifPacket.select[0], ifPacket.select[1], ifPacket.select[1]}; - result.v = pblend(ifPacket4, thenPacket.v, elsePacket.v); - return result; -} #else template <> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) { @@ -553,14 +544,6 @@ EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { kernel.packet[0].v = tmp; } -template <> -EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, - const Packet2cf& elsePacket) { - Packet2cf result; - result.v = reinterpret_cast( - pblend(ifPacket, reinterpret_cast(thenPacket.v), reinterpret_cast(elsePacket.v))); - return result; -} #endif } // end namespace internal diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h index 39073ed81..4ccda873d 100644 --- a/Eigen/src/Core/arch/ZVector/PacketMath.h +++ b/Eigen/src/Core/arch/ZVector/PacketMath.h @@ -167,7 +167,6 @@ struct packet_traits : default_packet_traits { HasSub = 1, HasMul = 1, HasDiv = 1, - HasBlend = 1 }; }; @@ -197,7 +196,6 @@ struct packet_traits : default_packet_traits { HasTanh = 1, HasErf = 1, HasNegate = 1, - HasBlend = 1 }; }; @@ -224,7 +222,6 @@ struct packet_traits : default_packet_traits { HasSqrt = 1, HasRsqrt = 1, HasNegate = 1, - HasBlend = 1 }; }; @@ -594,41 +591,32 @@ EIGEN_STRONG_INLINE void prefetch(const double* addr) { template EIGEN_STRONG_INLINE Packet2l parithmetic_shift_right(const Packet2l& a) { - return Packet2l { parithmetic_shift_right(a[0]), parithmetic_shift_right(a[1]) }; + return Packet2l{parithmetic_shift_right(a[0]), parithmetic_shift_right(a[1])}; } template EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i& a) { - return Packet4i { - parithmetic_shift_right(a[0]), - parithmetic_shift_right(a[1]), - parithmetic_shift_right(a[2]), - parithmetic_shift_right(a[3]) }; + return Packet4i{parithmetic_shift_right(a[0]), parithmetic_shift_right(a[1]), parithmetic_shift_right(a[2]), + parithmetic_shift_right(a[3])}; } template EIGEN_STRONG_INLINE Packet2l plogical_shift_right(const Packet2l& a) { - return Packet2l { plogical_shift_right(a[0]), plogical_shift_right(a[1]) }; + return Packet2l{plogical_shift_right(a[0]), plogical_shift_right(a[1])}; } template EIGEN_STRONG_INLINE Packet4i plogical_shift_right(const Packet4i& a) { - return Packet4i { - plogical_shift_right(a[0]), - plogical_shift_right(a[1]), - plogical_shift_right(a[2]), - plogical_shift_right(a[3]) }; + return Packet4i{plogical_shift_right(a[0]), plogical_shift_right(a[1]), plogical_shift_right(a[2]), + plogical_shift_right(a[3])}; } template EIGEN_STRONG_INLINE Packet2l plogical_shift_left(const Packet2l& a) { - return Packet2l { plogical_shift_left(a[0]), plogical_shift_left(a[1]) }; + return Packet2l{plogical_shift_left(a[0]), plogical_shift_left(a[1])}; } template EIGEN_STRONG_INLINE Packet4i plogical_shift_left(const Packet4i& a) { - return Packet4i { - plogical_shift_left(a[0]), - plogical_shift_left(a[1]), - plogical_shift_left(a[2]), - plogical_shift_left(a[3]) }; + return Packet4i{plogical_shift_left(a[0]), plogical_shift_left(a[1]), plogical_shift_left(a[2]), + plogical_shift_left(a[3])}; } template <> @@ -747,22 +735,6 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { kernel.packet[1] = t1; } -template <> -EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, - const Packet4i& elsePacket) { - Packet4ui select = {ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3]}; - Packet4ui mask = vec_cmpeq(select, reinterpret_cast(p4i_ONE)); - return vec_sel(elsePacket, thenPacket, mask); -} - -template <> -EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, - const Packet2d& elsePacket) { - Packet2ul select = {ifPacket.select[0], ifPacket.select[1]}; - Packet2ul mask = vec_cmpeq(select, reinterpret_cast(p2l_ONE)); - return vec_sel(elsePacket, thenPacket, mask); -} - /* z13 has no vector float support so we emulate that with double z14 has proper vector float support. */ @@ -1068,19 +1040,6 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { kernel.packet[3].v4f[1] = t3.packet[1]; } -template <> -EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, - const Packet4f& elsePacket) { - Packet2ul select_hi = {ifPacket.select[0], ifPacket.select[1]}; - Packet2ul select_lo = {ifPacket.select[2], ifPacket.select[3]}; - Packet2ul mask_hi = vec_cmpeq(select_hi, reinterpret_cast(p2l_ONE)); - Packet2ul mask_lo = vec_cmpeq(select_lo, reinterpret_cast(p2l_ONE)); - Packet4f result; - result.v4f[0] = vec_sel(elsePacket.v4f[0], thenPacket.v4f[0], mask_hi); - result.v4f[1] = vec_sel(elsePacket.v4f[1], thenPacket.v4f[1], mask_lo); - return result; -} - template <> Packet4f EIGEN_STRONG_INLINE pcmp_le(const Packet4f& a, const Packet4f& b) { Packet4f res; @@ -1288,14 +1247,6 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { kernel.packet[3] = vec_mergel(t1, t3); } -template <> -EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, - const Packet4f& elsePacket) { - Packet4ui select = {ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3]}; - Packet4ui mask = vec_cmpeq(select, reinterpret_cast(p4i_ONE)); - return vec_sel(elsePacket, thenPacket, mask); -} - #endif template <> @@ -1338,62 +1289,51 @@ EIGEN_STRONG_INLINE Packet4f plset(const float& a) { } #if !defined(vec_float) || !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 13) -#pragma GCC warning \ - "float->int and int->float conversion is simulated. compile for z15 for improved performance" +#pragma GCC warning "float->int and int->float conversion is simulated. compile for z15 for improved performance" template <> struct cast_impl { EIGEN_DEVICE_FUNC static inline Packet4f run(const Packet4i& a) { - return Packet4f{float(a[0]), float(a[1]), float(a[2]), float(a[3]) }; + return Packet4f{float(a[0]), float(a[1]), float(a[2]), float(a[3])}; } }; template <> struct cast_impl { EIGEN_DEVICE_FUNC static inline Packet4i run(const Packet4f& a) { - return Packet4i{int(a[0]), int(a[1]), int(a[2]), int(a[3]) }; + return Packet4i{int(a[0]), int(a[1]), int(a[2]), int(a[3])}; } }; template <> struct cast_impl { - EIGEN_DEVICE_FUNC static inline Packet2d run(const Packet2l& a) { - return Packet2d{double(a[0]), double(a[1]) }; - } + EIGEN_DEVICE_FUNC static inline Packet2d run(const Packet2l& a) { return Packet2d{double(a[0]), double(a[1])}; } }; template <> struct cast_impl { EIGEN_DEVICE_FUNC static inline Packet2l run(const Packet2d& a) { - return Packet2l{(long long)(a[0]), (long long)(a[1]) }; + return Packet2l{(long long)(a[0]), (long long)(a[1])}; } }; #else template <> struct cast_impl { - EIGEN_DEVICE_FUNC static inline Packet4f run(const Packet4i& a) { - return vec_float(a); - } + EIGEN_DEVICE_FUNC static inline Packet4f run(const Packet4i& a) { return vec_float(a); } }; template <> struct cast_impl { - EIGEN_DEVICE_FUNC static inline Packet4i run(const Packet4f& a) { - return vec_signed(a); - } + EIGEN_DEVICE_FUNC static inline Packet4i run(const Packet4f& a) { return vec_signed(a); } }; template <> struct cast_impl { - EIGEN_DEVICE_FUNC static inline Packet2d run(const Packet2l& a) { - return vec_double(a); - } + EIGEN_DEVICE_FUNC static inline Packet2d run(const Packet2l& a) { return vec_double(a); } }; template <> struct cast_impl { - EIGEN_DEVICE_FUNC static inline Packet2l run(const Packet2d& a) { - return vec_signed(a); - } + EIGEN_DEVICE_FUNC static inline Packet2l run(const Packet2d& a) { return vec_signed(a); } }; #endif diff --git a/test/packetmath.cpp b/test/packetmath.cpp index f21c72621..18574d25a 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -747,22 +747,6 @@ void packetmath() { } } - if (PacketTraits::HasBlend) { - Packet thenPacket = internal::pload(data1); - Packet elsePacket = internal::pload(data2); - EIGEN_ALIGN_MAX internal::Selector selector; - for (int i = 0; i < PacketSize; ++i) { - selector.select[i] = i; - } - - Packet blend = internal::pblend(selector, thenPacket, elsePacket); - EIGEN_ALIGN_MAX Scalar result[size]; - internal::pstore(result, blend); - for (int i = 0; i < PacketSize; ++i) { - VERIFY(test::isApproxAbs(result[i], (selector.select[i] ? data1[i] : data2[i]), refvalue)); - } - } - { for (int i = 0; i < PacketSize; ++i) { // "if" mask diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index 5544953e2..f6bdc50e0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -691,9 +691,9 @@ struct TensorEvaluator static constexpr int Layout = TensorEvaluator::Layout; enum { IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, - PacketAccess = (TensorEvaluator::PacketAccess && - TensorEvaluator::PacketAccess && PacketType::HasBlend) || - TernaryPacketAccess, + PacketAccess = + (TensorEvaluator::PacketAccess && TensorEvaluator::PacketAccess) || + TernaryPacketAccess, BlockAccess = TensorEvaluator::BlockAccess && TensorEvaluator::BlockAccess && TensorEvaluator::BlockAccess, @@ -789,13 +789,14 @@ struct TensorEvaluator template = true> EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { - internal::Selector select; + Scalar arr[PacketSize]; EIGEN_UNROLL_LOOP for (Index i = 0; i < PacketSize; ++i) { - select.select[i] = m_condImpl.coeff(index + i); + arr[i] = m_condImpl.coeff(index + i) ? Scalar(-1) : Scalar(0); } - return internal::pblend(select, m_thenImpl.template packet(index), - m_elseImpl.template packet(index)); + return TernarySelectOp().template packetOp(m_thenImpl.template packet(index), + m_elseImpl.template packet(index), + internal::pload(arr)); } template = true>