more avx predux_any

This commit is contained in:
Charles Schlosser
2024-11-07 19:58:48 +00:00
committed by Rasmus Munk Larsen
parent bc424f617a
commit 8adf43640e
2 changed files with 23 additions and 7 deletions

View File

@@ -2025,6 +2025,15 @@ EIGEN_STRONG_INLINE bool predux_any(const Packet8ui& x) {
return _mm256_movemask_ps(_mm256_castsi256_ps(x)) != 0;
}
template <>
EIGEN_STRONG_INLINE bool predux_any(const Packet8h& x) {
return _mm_movemask_epi8(x) != 0;
}
template <>
EIGEN_STRONG_INLINE bool predux_any(const Packet8bf& x) {
return _mm_movemask_epi8(x) != 0;
}
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8f, 8>& kernel) {
__m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
__m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);

View File

@@ -1640,16 +1640,23 @@ EIGEN_STRONG_INLINE int64_t predux_max<Packet8l>(const Packet8l& a) {
}
template <>
EIGEN_STRONG_INLINE bool predux_any(const Packet16f& x) {
Packet16i xi = _mm512_castps_si512(x);
__mmask16 tmp = _mm512_test_epi32_mask(xi, xi);
return !_mm512_kortestz(tmp, tmp);
EIGEN_STRONG_INLINE bool predux_any(const Packet16f& a) {
return _mm512_reduce_or_epi32(_mm512_castps_si512(a)) != 0;
}
template <>
EIGEN_STRONG_INLINE bool predux_any(const Packet16i& x) {
__mmask16 tmp = _mm512_test_epi32_mask(x, x);
return !_mm512_kortestz(tmp, tmp);
EIGEN_STRONG_INLINE bool predux_any(const Packet16i& a) {
return _mm512_reduce_or_epi32(a) != 0;
}
template <>
EIGEN_STRONG_INLINE bool predux_any(const Packet8d& a) {
return _mm512_reduce_or_epi64(_mm512_castpd_si512(a)) != 0;
}
template <>
EIGEN_STRONG_INLINE bool predux_any(const Packet8l& a) {
return _mm512_reduce_or_epi64(a) != 0;
}
#define PACK_OUTPUT(OUTPUT, INPUT, INDEX, STRIDE) \