diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 30878eda6..7979c3aff 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -324,13 +324,13 @@ preduxp(const Packet* vecs) { return vecs[0]; } template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux(const Packet& a) { return a; } -/** \internal \returns the sum of the elements of \a a by block of 4 elements. +/** \internal \returns the sum of the elements of upper and lower half of \a a if \a a is larger than 4. * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7} * For packet-size smaller or equal to 4, this boils down to a noop. */ template EIGEN_DEVICE_FUNC inline typename conditional<(unpacket_traits::size%8)==0,typename unpacket_traits::half,Packet>::type -predux_downto4(const Packet& a) +predux_half_dowto4(const Packet& a) { return a; } /** \internal \returns the product of the elements of \a a*/ diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 636230944..af31023f0 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -412,7 +412,7 @@ template<> EIGEN_STRONG_INLINE double predux(const Packet4d& a) return predux(Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a),_mm256_extractf128_pd(a,1)))); } -template<> EIGEN_STRONG_INLINE Packet4f predux_downto4(const Packet8f& a) +template<> EIGEN_STRONG_INLINE Packet4f predux_half_dowto4(const Packet8f& a) { return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1)); } diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index abece01bc..eb5de43d4 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -888,7 +888,7 @@ EIGEN_STRONG_INLINE double predux(const Packet8d& a) { } template <> -EIGEN_STRONG_INLINE Packet8f predux_downto4(const Packet16f& a) { +EIGEN_STRONG_INLINE Packet8f predux_half_dowto4(const Packet16f& a) { #ifdef EIGEN_VECTORIZE_AVX512DQ __m256 lane0 = _mm512_extractf32x8_ps(a, 0); __m256 lane1 = _mm512_extractf32x8_ps(a, 1); @@ -904,7 +904,7 @@ EIGEN_STRONG_INLINE Packet8f predux_downto4(const Packet16f& a) { #endif } template <> -EIGEN_STRONG_INLINE Packet4d predux_downto4(const Packet8d& a) { +EIGEN_STRONG_INLINE Packet4d predux_half_dowto4(const Packet8d& a) { __m256d lane0 = _mm512_extractf64x4_pd(a, 0); __m256d lane1 = _mm512_extractf64x4_pd(a, 1); __m256d res = _mm256_add_pd(lane0, lane1); diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 9072d0ff3..dd4f366ee 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -580,7 +580,7 @@ DoublePacket padd(const DoublePacket &a, const DoublePacket -const DoublePacket& predux_downto4(const DoublePacket &a) +const DoublePacket& predux_half_dowto4(const DoublePacket &a) { return a; } @@ -1596,13 +1596,13 @@ void gebp_kernel void packetmath() ref[i] = 0; for (int i=0; i(data1))); - VERIFY(areApprox(ref, data2, HalfPacketSize) && "internal::predux_downto4"); + internal::pstore(data2, internal::predux_half_dowto4(internal::pload(data1))); + VERIFY(areApprox(ref, data2, HalfPacketSize) && "internal::predux_half_dowto4"); } ref[0] = 1;