From 85b6d82b49c636364d92b732aece58949df6741d Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 1 Feb 2016 14:35:51 -0800 Subject: [PATCH] Generalized predux4 to support AVX512 packets, and renamed it predux_half. Disabled the implementation of pabs for avx512 since the corresponding intrinsics are not shipped with gcc --- Eigen/src/Core/GenericPacketMath.h | 2 +- Eigen/src/Core/arch/AVX/PacketMath.h | 2 +- Eigen/src/Core/arch/AVX512/PacketMath.h | 10 ++++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 5f27d8166..d51413e98 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -314,7 +314,7 @@ template EIGEN_DEVICE_FUNC inline typename unpacket_traits EIGEN_DEVICE_FUNC inline typename conditional<(unpacket_traits::size%8)==0,typename unpacket_traits::half,Packet>::type -predux4(const Packet& a) +predux_half(const Packet& a) { return a; } /** \internal \returns the product of the elements of \a a*/ diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 5ec325fce..7161f3867 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -401,7 +401,7 @@ template<> EIGEN_STRONG_INLINE double predux(const Packet4d& a) return pfirst(_mm256_hadd_pd(tmp0,tmp0)); } -template<> EIGEN_STRONG_INLINE Packet4f predux4(const Packet8f& a) +template<> EIGEN_STRONG_INLINE Packet4f predux_half(const Packet8f& a) { return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1)); } diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index d3b1eea06..55d93e35b 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -633,11 +633,13 @@ template<> EIGEN_STRONG_INLINE Packet8d preverse(const Packet8d& a) template<> EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a) { - return _mm512_abs_ps(a); + assert(false && "to be implemented"); + // return _mm512_abs_ps(a); } template<> EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) { - return _mm512_abs_pd(a); + assert(false && "to be implemented"); + // return _mm512_abs_pd(a); } template<> EIGEN_STRONG_INLINE Packet16f preduxp(const Packet16f* vecs) @@ -679,7 +681,7 @@ EIGEN_STRONG_INLINE double predux(const Packet8d& a) { } template <> -EIGEN_STRONG_INLINE Packet8f predux4(const Packet16f& a) { +EIGEN_STRONG_INLINE Packet8f predux_half(const Packet16f& a) { #ifdef EIGEN_VECTORIZE_AVX512DQ Packet8f lane0 = _mm512_extractf32x8_ps(a, 0); Packet8f lane1 = _mm512_extractf32x8_ps(a, 1); @@ -695,7 +697,7 @@ EIGEN_STRONG_INLINE Packet8f predux4(const Packet16f& a) { #endif } template <> -EIGEN_STRONG_INLINE Packet4d predux4(const Packet8d& a) { +EIGEN_STRONG_INLINE Packet4d predux_half(const Packet8d& a) { Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); Packet4d res = padd(lane0, lane1);