From 9164d3f16ad21c1546d5fc99cd28fffc8ac3c1ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20S=C3=A1nchez?= Date: Thu, 18 Dec 2025 21:08:52 +0000 Subject: [PATCH] Fix undefined behavior in packetmath. libeigen/eigen!2098 Closes #3009 --- Eigen/src/Core/arch/AVX/Complex.h | 7 ++++--- Eigen/src/Core/arch/AVX/PacketMath.h | 6 ++++-- Eigen/src/Core/arch/AVX512/Complex.h | 9 +++++---- Eigen/src/Core/arch/AVX512/PacketMath.h | 18 +++++++++--------- Eigen/src/Core/arch/SSE/Complex.h | 2 +- Eigen/src/Core/arch/SSE/PacketMath.h | 12 +++++++----- 6 files changed, 30 insertions(+), 24 deletions(-) diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h index a4a87c4fc..bf19df79a 100644 --- a/Eigen/src/Core/arch/AVX/Complex.h +++ b/Eigen/src/Core/arch/AVX/Complex.h @@ -79,8 +79,8 @@ EIGEN_STRONG_INLINE Packet4cf pnegate(const Packet4cf& a) { } template <> EIGEN_STRONG_INLINE Packet4cf pconj(const Packet4cf& a) { - const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000, 0x80000000, 0x00000000, 0x80000000, 0x00000000, - 0x80000000, 0x00000000, 0x80000000)); + const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000, SIGN_MASK_I32, 0x00000000, SIGN_MASK_I32, + 0x00000000, SIGN_MASK_I32, 0x00000000, SIGN_MASK_I32)); return Packet4cf(_mm256_xor_ps(a.v, mask)); } @@ -282,7 +282,8 @@ EIGEN_STRONG_INLINE Packet2cd pnegate(const Packet2cd& a) { } template <> EIGEN_STRONG_INLINE Packet2cd pconj(const Packet2cd& a) { - const __m256d mask = _mm256_castsi256_pd(_mm256_set_epi32(0x80000000, 0x0, 0x0, 0x0, 0x80000000, 0x0, 0x0, 0x0)); + const __m256d mask = + _mm256_castsi256_pd(_mm256_set_epi32(SIGN_MASK_I32, 0x0, 0x0, 0x0, SIGN_MASK_I32, 0x0, 0x0, 0x0)); return Packet2cd(_mm256_xor_pd(a.v, mask)); } diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index eafff3d8c..f814d30b4 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -46,6 +46,8 @@ typedef eigen_packet_wrapper<__m256i, 3> Packet4l; typedef eigen_packet_wrapper<__m256i, 5> Packet4ul; #endif +#define SIGN_MASK_I64 static_cast(0x8000000000000000ULL) + template <> struct is_arithmetic<__m256> { enum { value = true }; @@ -875,12 +877,12 @@ EIGEN_STRONG_INLINE Packet8ui psub(const Packet8ui& a, const Packet8u template <> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a) { - const Packet8f mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000)); + const Packet8f mask = _mm256_castsi256_ps(_mm256_set1_epi32(SIGN_MASK_I32)); return _mm256_xor_ps(a, mask); } template <> EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a) { - const Packet4d mask = _mm256_castsi256_pd(_mm256_set1_epi64x(0x8000000000000000ULL)); + const Packet4d mask = _mm256_castsi256_pd(_mm256_set1_epi64x(SIGN_MASK_I64)); return _mm256_xor_pd(a, mask); } template <> diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h index b9b49534d..ba15f41db 100644 --- a/Eigen/src/Core/arch/AVX512/Complex.h +++ b/Eigen/src/Core/arch/AVX512/Complex.h @@ -82,8 +82,8 @@ EIGEN_STRONG_INLINE Packet8cf pnegate(const Packet8cf& a) { template <> EIGEN_STRONG_INLINE Packet8cf pconj(const Packet8cf& a) { const __m512 mask = _mm512_castsi512_ps(_mm512_setr_epi32( - 0x00000000, 0x80000000, 0x00000000, 0x80000000, 0x00000000, 0x80000000, 0x00000000, 0x80000000, 0x00000000, - 0x80000000, 0x00000000, 0x80000000, 0x00000000, 0x80000000, 0x00000000, 0x80000000)); + 0x00000000, SIGN_MASK_I32, 0x00000000, SIGN_MASK_I32, 0x00000000, SIGN_MASK_I32, 0x00000000, SIGN_MASK_I32, + 0x00000000, SIGN_MASK_I32, 0x00000000, SIGN_MASK_I32, 0x00000000, SIGN_MASK_I32, 0x00000000, SIGN_MASK_I32)); return Packet8cf(pxor(a.v, mask)); } @@ -262,8 +262,9 @@ EIGEN_STRONG_INLINE Packet4cd pnegate(const Packet4cd& a) { } template <> EIGEN_STRONG_INLINE Packet4cd pconj(const Packet4cd& a) { - const __m512d mask = _mm512_castsi512_pd(_mm512_set_epi32(0x80000000, 0x0, 0x0, 0x0, 0x80000000, 0x0, 0x0, 0x0, - 0x80000000, 0x0, 0x0, 0x0, 0x80000000, 0x0, 0x0, 0x0)); + const __m512d mask = + _mm512_castsi512_pd(_mm512_set_epi32(SIGN_MASK_I32, 0x0, 0x0, 0x0, SIGN_MASK_I32, 0x0, 0x0, 0x0, SIGN_MASK_I32, + 0x0, 0x0, 0x0, SIGN_MASK_I32, 0x0, 0x0, 0x0)); return Packet4cd(pxor(a.v, mask)); } diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index c69ba159d..75a83e1ea 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -443,15 +443,15 @@ EIGEN_STRONG_INLINE Packet16f pnegate(const Packet16f& a) { // The intel docs give it a relatively high latency as well, so we're probably // better off with using _mm512_set_epi32 directly anyways. const __m512i mask = - _mm512_set_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, - 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000); + _mm512_set_epi32(SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32, + SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32, + SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32); return _mm512_castsi512_ps(_mm512_xor_epi32(_mm512_castps_si512(a), mask)); } template <> EIGEN_STRONG_INLINE Packet8d pnegate(const Packet8d& a) { - const __m512i mask = - _mm512_set_epi64(0x8000000000000000ULL, 0x8000000000000000ULL, 0x8000000000000000ULL, 0x8000000000000000ULL, - 0x8000000000000000ULL, 0x8000000000000000ULL, 0x8000000000000000ULL, 0x8000000000000000ULL); + const __m512i mask = _mm512_set_epi64(SIGN_MASK_I64, SIGN_MASK_I64, SIGN_MASK_I64, SIGN_MASK_I64, SIGN_MASK_I64, + SIGN_MASK_I64, SIGN_MASK_I64, SIGN_MASK_I64); return _mm512_castsi512_pd(_mm512_xor_epi64(_mm512_castpd_si512(a), mask)); } template <> @@ -770,22 +770,22 @@ EIGEN_STRONG_INLINE Packet8l pcmp_lt(const Packet8l& a, const Packet8l& b) { template <> EIGEN_STRONG_INLINE Packet8d pcmp_eq(const Packet8d& a, const Packet8d& b) { __mmask8 mask = _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ); - return _mm512_castsi512_pd(_mm512_mask_set1_epi64(_mm512_setzero_epi32(), mask, 0xffffffffffffffffu)); + return _mm512_castsi512_pd(_mm512_mask_set1_epi64(_mm512_setzero_epi32(), mask, int64_t(-1))); } template <> EIGEN_STRONG_INLINE Packet8d pcmp_le(const Packet8d& a, const Packet8d& b) { __mmask8 mask = _mm512_cmp_pd_mask(a, b, _CMP_LE_OQ); - return _mm512_castsi512_pd(_mm512_mask_set1_epi64(_mm512_setzero_epi32(), mask, 0xffffffffffffffffu)); + return _mm512_castsi512_pd(_mm512_mask_set1_epi64(_mm512_setzero_epi32(), mask, int64_t(-1))); } template <> EIGEN_STRONG_INLINE Packet8d pcmp_lt(const Packet8d& a, const Packet8d& b) { __mmask8 mask = _mm512_cmp_pd_mask(a, b, _CMP_LT_OQ); - return _mm512_castsi512_pd(_mm512_mask_set1_epi64(_mm512_setzero_epi32(), mask, 0xffffffffffffffffu)); + return _mm512_castsi512_pd(_mm512_mask_set1_epi64(_mm512_setzero_epi32(), mask, int64_t(-1))); } template <> EIGEN_STRONG_INLINE Packet8d pcmp_lt_or_nan(const Packet8d& a, const Packet8d& b) { __mmask8 mask = _mm512_cmp_pd_mask(a, b, _CMP_NGE_UQ); - return _mm512_castsi512_pd(_mm512_mask_set1_epi64(_mm512_setzero_epi32(), mask, 0xffffffffffffffffu)); + return _mm512_castsi512_pd(_mm512_mask_set1_epi64(_mm512_setzero_epi32(), mask, int64_t(-1))); } template <> diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index 9dfe3343c..4002c1612 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -277,7 +277,7 @@ EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { } template <> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { - const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000, 0x0, 0x0, 0x0)); + const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(static_cast(0x80000000), 0x0, 0x0, 0x0)); return Packet1cd(_mm_xor_pd(a.v, mask)); } diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 7d53fa204..bd6f5dfff 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -91,6 +91,8 @@ struct shuffle_mask { enum { mask = (s) << 6 | (r) << 4 | (q) << 2 | (p) }; }; +#define SIGN_MASK_I32 static_cast(0x80000000) + // TODO: change the implementation of all swizzle* ops from macro to template, #define vec4f_swizzle1(v, p, q, r, s) \ Packet4f(_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v), (shuffle_mask::mask)))) @@ -560,7 +562,7 @@ EIGEN_STRONG_INLINE Packet4f paddsub(const Packet4f& a, const Packet4f #ifdef EIGEN_VECTORIZE_SSE3 return _mm_addsub_ps(a, b); #else - const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000, 0x0, 0x80000000, 0x0)); + const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(SIGN_MASK_I32, 0x0, SIGN_MASK_I32, 0x0)); return padd(a, pxor(mask, b)); #endif } @@ -572,19 +574,19 @@ EIGEN_STRONG_INLINE Packet2d paddsub(const Packet2d& a, const Packet2d #ifdef EIGEN_VECTORIZE_SSE3 return _mm_addsub_pd(a, b); #else - const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0, 0x80000000, 0x0, 0x0)); + const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0, SIGN_MASK_I32, 0x0, 0x0)); return padd(a, pxor(mask, b)); #endif } template <> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { - const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000)); + const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32)); return _mm_xor_ps(a, mask); } template <> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { - const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0, 0x80000000, 0x0, 0x80000000)); + const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0, SIGN_MASK_I32, 0x0, SIGN_MASK_I32)); return _mm_xor_pd(a, mask); } template <> @@ -1249,7 +1251,7 @@ EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { } template <> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { - const __m128i mask = _mm_setr_epi32(0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF); + const __m128i mask = _mm_setr_epi32(-1, 0x7FFFFFFF, -1, 0x7FFFFFFF); return _mm_castsi128_pd(_mm_and_si128(mask, _mm_castpd_si128(a))); } template <>