mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
update AVX and AVX512 to support gcc < 10.1 and clang < 10
libeigen/eigen!2129 Closes #3021
This commit is contained in:
committed by
Antonio Sánchez
parent
552ca8f15f
commit
43a01f06ad
@@ -56,6 +56,40 @@ struct type_casting_traits<int64_t, double> : vectorized_type_casting_traits<int
|
||||
#endif
|
||||
#endif
|
||||
|
||||
EIGEN_STRONG_INLINE __m256 _eigen_mm256_set_m128(__m128 hi, __m128 lo) {
|
||||
#if EIGEN_COMP_GNUC && (EIGEN_COMP_CLANG < 1000 || EIGEN_COMP_GNUC < 810)
|
||||
__m256 result = _mm256_castps128_ps256(lo);
|
||||
return _mm256_insertf128_ps(result, hi, 1);
|
||||
#else
|
||||
return _mm256_set_m128(hi, lo);
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE __m256d _eigen_mm256_set_m128d(__m128d hi, __m128d lo) {
|
||||
#if EIGEN_COMP_GNUC && (EIGEN_COMP_CLANG < 1000 || EIGEN_COMP_GNUC < 810)
|
||||
__m256d result = _mm256_castpd128_pd256(lo);
|
||||
return _mm256_insertf128_pd(result, hi, 1);
|
||||
#else
|
||||
return _mm256_set_m128d(hi, lo);
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE __m256i _eigen_mm256_set_m128i(__m128i hi, __m128i lo) {
|
||||
#if EIGEN_COMP_GNUC && (EIGEN_COMP_CLANG < 1000 || EIGEN_COMP_GNUC < 810)
|
||||
#if defined(EIGEN_VECTORIZE_AVX2)
|
||||
__m256i result = _mm256_castsi128_si256(lo);
|
||||
return _mm256_inserti128_si256(result, hi, 1);
|
||||
#else
|
||||
EIGEN_ALIGN32 int32_t tmp[8];
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(tmp), lo);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(tmp + 4), hi);
|
||||
return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(tmp));
|
||||
#endif
|
||||
#else
|
||||
return _mm256_set_m128i(hi, lo);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16b pcast<Packet8f, Packet16b>(const Packet8f& a, const Packet8f& b) {
|
||||
__m256 nonzero_a = _mm256_cmp_ps(a, pzero(a), _CMP_NEQ_UQ);
|
||||
@@ -109,7 +143,7 @@ EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8i pcast<Packet4d, Packet8i>(const Packet4d& a, const Packet4d& b) {
|
||||
return _mm256_set_m128i(_mm256_cvttpd_epi32(b), _mm256_cvttpd_epi32(a));
|
||||
return _eigen_mm256_set_m128i(_mm256_cvttpd_epi32(b), _mm256_cvttpd_epi32(a));
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -124,7 +158,7 @@ EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) {
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8f pcast<Packet4d, Packet8f>(const Packet4d& a, const Packet4d& b) {
|
||||
return _mm256_set_m128(_mm256_cvtpd_ps(b), _mm256_cvtpd_ps(a));
|
||||
return _eigen_mm256_set_m128(_mm256_cvtpd_ps(b), _mm256_cvtpd_ps(a));
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -249,7 +283,7 @@ EIGEN_STRONG_INLINE Packet4d pcast<Packet4l, Packet4d>(const Packet4l& a) {
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4d pcast<Packet2l, Packet4d>(const Packet2l& a, const Packet2l& b) {
|
||||
return _mm256_set_m128d((pcast<Packet2l, Packet2d>(b)), (pcast<Packet2l, Packet2d>(a)));
|
||||
return _eigen_mm256_set_m128d((pcast<Packet2l, Packet2d>(b)), (pcast<Packet2l, Packet2d>(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
|
||||
@@ -44,6 +44,54 @@ typedef eigen_packet_wrapper<__m512i, 6> Packet32s;
|
||||
typedef eigen_packet_wrapper<__m256i, 6> Packet16s;
|
||||
typedef eigen_packet_wrapper<__m128i, 6> Packet8s;
|
||||
|
||||
EIGEN_STRONG_INLINE Packet16i _eigen_mm512_loadu_epi32(const int* from) {
|
||||
#if EIGEN_COMP_GNUC && (EIGEN_COMP_CLANG < 1000 || EIGEN_COMP_GNUC < 1010)
|
||||
return _mm512_loadu_si512(reinterpret_cast<const void*>(from));
|
||||
#else
|
||||
return _mm512_loadu_epi32(from);
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet16i _eigen_mm512_loadu_epi64(const int64_t* from) {
|
||||
#if EIGEN_COMP_GNUC && (EIGEN_COMP_CLANG < 1000 || EIGEN_COMP_GNUC < 1010)
|
||||
return _mm512_loadu_si512(reinterpret_cast<const void*>(from));
|
||||
#else
|
||||
return _mm512_loadu_epi64(from);
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void _eigen_mm512_storeu_epi32(void* to, const Packet16i& from) {
|
||||
#if EIGEN_COMP_GNUC && (EIGEN_COMP_CLANG < 1000 || EIGEN_COMP_GNUC < 1010)
|
||||
_mm512_storeu_si512(to, from);
|
||||
#else
|
||||
_mm512_storeu_epi32(to, from);
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void _eigen_mm512_storeu_epi64(void* to, const Packet16i& from) {
|
||||
#if EIGEN_COMP_GNUC && (EIGEN_COMP_CLANG < 1000 || EIGEN_COMP_GNUC < 1010)
|
||||
_mm512_storeu_si512(to, from);
|
||||
#else
|
||||
_mm512_storeu_epi64(to, from);
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void _eigen_mm256_storeu_epi32(void* to, const __m256i& from) {
|
||||
#if EIGEN_COMP_GNUC && (EIGEN_COMP_CLANG < 1000 || EIGEN_COMP_GNUC < 1010)
|
||||
_mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from);
|
||||
#else
|
||||
_mm256_storeu_epi32(to, from);
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void _eigen_mm_storeu_epi32(void* to, const __m128i& from) {
|
||||
#if EIGEN_COMP_GNUC && (EIGEN_COMP_CLANG < 1000 || EIGEN_COMP_GNUC < 1010)
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(to), from);
|
||||
#else
|
||||
_mm_storeu_epi32(to, from);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <>
|
||||
struct is_arithmetic<__m512> {
|
||||
enum { value = true };
|
||||
@@ -1033,11 +1081,11 @@ EIGEN_STRONG_INLINE Packet8d ploadu<Packet8d>(const double* from) {
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16i ploadu<Packet16i>(const int* from) {
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_epi32(from);
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return _eigen_mm512_loadu_epi32(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8l ploadu<Packet8l>(const int64_t* from) {
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_epi64(from);
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return _eigen_mm512_loadu_epi64(from);
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -1158,11 +1206,11 @@ EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet8d& from) {
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet16i& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_epi32(to, from);
|
||||
EIGEN_DEBUG_UNALIGNED_STORE _eigen_mm512_storeu_epi32(to, from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<int64_t>(int64_t* to, const Packet8l& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_epi64(to, from);
|
||||
EIGEN_DEBUG_UNALIGNED_STORE _eigen_mm512_storeu_epi64(to, from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet16f& from, uint16_t umask) {
|
||||
@@ -2997,19 +3045,19 @@ EIGEN_STRONG_INLINE void pstore<numext::int16_t, Packet8s>(numext::int16_t* out,
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<numext::int16_t, Packet32s>(numext::int16_t* out, const Packet32s& x) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE
|
||||
_mm512_storeu_epi32(out, x);
|
||||
_eigen_mm512_storeu_epi32(out, x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<numext::int16_t, Packet16s>(numext::int16_t* out, const Packet16s& x) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE
|
||||
_mm256_storeu_epi32(out, x);
|
||||
_eigen_mm256_storeu_epi32(out, x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<numext::int16_t, Packet8s>(numext::int16_t* out, const Packet8s& x) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE
|
||||
_mm_storeu_epi32(out, x);
|
||||
_eigen_mm_storeu_epi32(out, x);
|
||||
}
|
||||
|
||||
template <>
|
||||
|
||||
@@ -52,9 +52,17 @@ struct type_casting_traits<bfloat16, float> : vectorized_type_casting_traits<bfl
|
||||
template <>
|
||||
struct type_casting_traits<float, bfloat16> : vectorized_type_casting_traits<float, bfloat16> {};
|
||||
|
||||
EIGEN_STRONG_INLINE __mmask16 _eigen_mm512_cmpneq_ps_mask(__m512 a, __m512 b) {
|
||||
#if EIGEN_COMP_GNUC && (EIGEN_COMP_CLANG < 1000 || EIGEN_COMP_GNUC < 810)
|
||||
return _mm512_cmp_ps_mask(a, b, _CMP_NEQ_UQ);
|
||||
#else
|
||||
return _mm512_cmpneq_ps_mask(a, b);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16b pcast<Packet16f, Packet16b>(const Packet16f& a) {
|
||||
__mmask16 mask = _mm512_cmpneq_ps_mask(a, pzero(a));
|
||||
__mmask16 mask = _eigen_mm512_cmpneq_ps_mask(a, pzero(a));
|
||||
return _mm512_maskz_cvtepi32_epi8(mask, _mm512_set1_epi32(1));
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user