diff --git a/Eigen/src/Core/arch/AVX512/MathFunctions.h b/Eigen/src/Core/arch/AVX512/MathFunctions.h index 93c5ec43f..aac707596 100644 --- a/Eigen/src/Core/arch/AVX512/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX512/MathFunctions.h @@ -373,6 +373,19 @@ EIGEN_STRONG_INLINE Packet16f prsqrt(const Packet16f& x) { #endif #endif + +template <> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f +psin(const Packet16f& _x) { + return psin_float(_x); +} + +template <> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f +pcos(const Packet16f& _x) { + return pcos_float(_x); +} + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 9a053fb1a..7d90ce4c1 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -55,6 +55,8 @@ template<> struct packet_traits : default_packet_traits size = 16, HasHalfPacket = 1, HasBlend = 0, + HasSin = EIGEN_FAST_MATH, + HasCos = EIGEN_FAST_MATH, #if EIGEN_GNUC_AT_LEAST(5, 3) || EIGEN_COMP_CLANG #ifdef EIGEN_VECTORIZE_AVX512DQ HasLog = 1, @@ -99,6 +101,7 @@ template <> struct unpacket_traits { typedef float type; typedef Packet8f half; + typedef Packet16i integer_packet; enum { size = 16, alignment=Aligned64 }; }; template <> @@ -127,6 +130,11 @@ EIGEN_STRONG_INLINE Packet16i pset1(const int& from) { return _mm512_set1_epi32(from); } +template <> +EIGEN_STRONG_INLINE Packet16f pset1frombits(unsigned int from) { + return _mm512_castsi512_ps(_mm512_set1_epi32(from)); +} + template <> EIGEN_STRONG_INLINE Packet16f pload1(const float* from) { return _mm512_broadcastss_ps(_mm_load_ps1(from)); @@ -254,6 +262,12 @@ EIGEN_STRONG_INLINE Packet8d pmax(const Packet8d& a, return _mm512_max_pd(b, a); } +template<> EIGEN_STRONG_INLINE Packet16i pcmp_eq(const Packet16i& a, const Packet16i& b) { + __m256i lo = _mm256_cmpeq_epi32(_mm512_extracti64x4_epi64(a, 0), _mm512_extracti64x4_epi64(b, 0)); + __m256i hi = _mm256_cmpeq_epi32(_mm512_extracti64x4_epi64(a, 1), _mm512_extracti64x4_epi64(b, 1)); + return _mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1); +} + template <> EIGEN_STRONG_INLINE Packet16f pand(const Packet16f& a, const Packet16f& b) { @@ -434,6 +448,10 @@ EIGEN_STRONG_INLINE Packet8d pandnot(const Packet8d& a, #endif } +template EIGEN_STRONG_INLINE Packet16i pshiftleft(Packet16i a) { + return _mm512_slli_epi32(a, N); +} + template <> EIGEN_STRONG_INLINE Packet16f pload(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ps(from); @@ -1322,6 +1340,22 @@ template<> EIGEN_STRONG_INLINE Packet8d pinsertlast(const Packet8d& a, double b) return _mm512_mask_broadcastsd_pd(a, (1<<7), _mm_load_sd(&b)); } +template<> EIGEN_STRONG_INLINE Packet16i pcast(const Packet16f& a) { + return _mm512_cvttps_epi32(a); +} + +template<> EIGEN_STRONG_INLINE Packet16f pcast(const Packet16i& a) { + return _mm512_cvtepi32_ps(a); +} + +template<> EIGEN_STRONG_INLINE Packet16i preinterpret(const Packet16f& a) { + return _mm512_castps_si512(a); +} + +template<> EIGEN_STRONG_INLINE Packet16f preinterpret(const Packet16i& a) { + return _mm512_castsi512_ps(a); +} + } // end namespace internal } // end namespace Eigen