From 66d073c38e3cd5dad974deea7b3d1d45247ea55b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20P=2E=20L=2E=20de=20Carvalho?= Date: Fri, 9 Aug 2019 15:56:26 -0600 Subject: [PATCH 1/5] bug #1718: Add cast to successfully compile with clang on PowerPC Ignoring -Wc11-extensions warnings thrown by clang at Altivec/PacketMath.h --- Eigen/src/Core/arch/AltiVec/PacketMath.h | 2 +- Eigen/src/Core/util/DisableStupidWarnings.h | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 4b770d036..f3d374a62 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -452,7 +452,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pandnot(const Packet4f& a, con template<> EIGEN_STRONG_INLINE Packet4i pandnot(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); } template<> EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b) { - return vec_sel(b, a, mask); + return vec_sel(b, a, reinterpret_cast(mask)); } template<> EIGEN_STRONG_INLINE Packet4f pround(const Packet4f& a) { return vec_round(a); } diff --git a/Eigen/src/Core/util/DisableStupidWarnings.h b/Eigen/src/Core/util/DisableStupidWarnings.h index 6c7c2d655..4501d3248 100755 --- a/Eigen/src/Core/util/DisableStupidWarnings.h +++ b/Eigen/src/Core/util/DisableStupidWarnings.h @@ -44,6 +44,11 @@ #if __clang_major__ >= 3 && __clang_minor__ >= 5 #pragma clang diagnostic ignored "-Wabsolute-value" #endif + #if ( defined(__ALTIVEC__) || defined(__VSX__) ) && __cplusplus < 201103L + // warning: generic selections are a C11-specific feature + // ignoring warnings thrown at vec_ctf in Altivec/PacketMath.h + #pragma clang diagnostic ignored "-Wc11-extensions" + #endif #elif defined __GNUC__ From 4d29aa0294a0d0aa21c41eef687840a5c59bf692 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20P=2E=20L=2E=20de=20Carvalho?= Date: Fri, 9 Aug 2019 15:59:26 -0600 Subject: [PATCH 2/5] Fix offset argument of ploadu/pstoreu for Altivec If no offset is given, them it should be zero. Also passes full address to vec_vsx_ld/st builtins. Removes userless _EIGEN_ALIGNED_PTR & _EIGEN_MASK_ALIGNMENT. Removes unnecessary casts. --- Eigen/src/Core/arch/AltiVec/PacketMath.h | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index f3d374a62..1fef285ce 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -83,15 +83,6 @@ static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 }; static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 }; static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 }; -// Mask alignment -#ifdef __PPC64__ -#define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0 -#else -#define _EIGEN_MASK_ALIGNMENT 0xfffffff0 -#endif - -#define _EIGEN_ALIGNED_PTR(x) ((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT) - // Handle endianness properly while loading constants // Define global static constants: #ifdef _BIG_ENDIAN @@ -487,12 +478,12 @@ template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD - return (Packet4i) vec_vsx_ld((long)from & 15, (const int*) _EIGEN_ALIGNED_PTR(from)); + return vec_vsx_ld(0, from); } template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD - return (Packet4f) vec_vsx_ld((long)from & 15, (const float*) _EIGEN_ALIGNED_PTR(from)); + return vec_vsx_ld(0, from); } #endif @@ -553,12 +544,12 @@ template<> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet4i& f template<> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE - vec_vsx_st(from, (long)to & 15, (int*) _EIGEN_ALIGNED_PTR(to)); + vec_vsx_st(from, 0, to); } template<> EIGEN_STRONG_INLINE void pstoreu(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE - vec_vsx_st(from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to)); + vec_vsx_st(from, 0, to); } #endif @@ -1045,7 +1036,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pfloor(const Packet2d& a) { re template<> EIGEN_STRONG_INLINE Packet2d ploadu(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD - return (Packet2d) vec_vsx_ld((long)from & 15, (const double*) _EIGEN_ALIGNED_PTR(from)); + return vec_vsx_ld(0, from); } template<> EIGEN_STRONG_INLINE Packet2d ploaddup(const double* from) @@ -1059,7 +1050,7 @@ template<> EIGEN_STRONG_INLINE Packet2d ploaddup(const double* from) template<> EIGEN_STRONG_INLINE void pstoreu(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE - vec_vsx_st((Packet4f)from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to)); + vec_vsx_st(from, 0, to); } template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { EIGEN_PPC_PREFETCH(addr); } From 787f6ef0254949380cc6955890eeb9c282c2350f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20P=2E=20L=2E=20de=20Carvalho?= Date: Fri, 9 Aug 2019 16:02:55 -0600 Subject: [PATCH 3/5] Fix packed load/store for PowerPC's VSX The vec_vsx_ld/vec_vsx_st builtins were wrongly used for aligned load/store. In fact, they perform unaligned memory access and, even when the address is 16-byte aligned, they are much slower (at least 2x) than their aligned counterparts. For double/Packet2d vec_xl/vec_xst should be prefered over vec_ld/vec_st, although the latter works when casted to float/Packet4f. Silencing some weird warning with throw but some GCC versions. Such warning are not thrown by Clang. --- Eigen/src/Core/arch/AltiVec/PacketMath.h | 40 +++++++++--------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 1fef285ce..30694d424 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -240,42 +240,38 @@ inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v) // Need to define them first or we get specialization after instantiation errors template<> EIGEN_STRONG_INLINE Packet4f pload(const float* from) { + // some versions of GCC throw "unused-but-set-parameter". + // ignoring these warnings for now. + EIGEN_UNUSED_VARIABLE(from); EIGEN_DEBUG_ALIGNED_LOAD -#ifdef __VSX__ - return vec_vsx_ld(0, from); -#else return vec_ld(0, from); -#endif } template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { + // some versions of GCC throw "unused-but-set-parameter". + // ignoring these warnings for now. + EIGEN_UNUSED_VARIABLE(from); EIGEN_DEBUG_ALIGNED_LOAD -#ifdef __VSX__ - return vec_vsx_ld(0, from); -#else return vec_ld(0, from); -#endif } template<> EIGEN_STRONG_INLINE void pstore(float* to, const Packet4f& from) { + // some versions of GCC throw "unused-but-set-parameter" (float *to). + // ignoring these warnings for now. + EIGEN_UNUSED_VARIABLE(to); EIGEN_DEBUG_ALIGNED_STORE -#ifdef __VSX__ - vec_vsx_st(from, 0, to); -#else vec_st(from, 0, to); -#endif } template<> EIGEN_STRONG_INLINE void pstore(int* to, const Packet4i& from) { + // some versions of GCC throw "unused-but-set-parameter" (float *to). + // ignoring these warnings for now. + EIGEN_UNUSED_VARIABLE(to); EIGEN_DEBUG_ALIGNED_STORE -#ifdef __VSX__ - vec_vsx_st(from, 0, to); -#else vec_st(from, 0, to); -#endif } template<> EIGEN_STRONG_INLINE Packet4f pset1(const float& from) { @@ -940,21 +936,13 @@ inline std::ostream & operator <<(std::ostream & s, const Packet2d & v) template<> EIGEN_STRONG_INLINE Packet2d pload(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD -#ifdef __VSX__ - return vec_vsx_ld(0, from); -#else - return vec_ld(0, from); -#endif + return vec_xl(0, const_cast(from)); // cast needed by Clang } template<> EIGEN_STRONG_INLINE void pstore(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE -#ifdef __VSX__ - vec_vsx_st(from, 0, to); -#else - vec_st(from, 0, to); -#endif + vec_xst(from, 0, to); } template<> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { From db9147ae40695e43ec694b2e207d0acc5b0570d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20P=2E=20L=2E=20de=20Carvalho?= Date: Wed, 14 Aug 2019 10:37:39 -0600 Subject: [PATCH 4/5] Add missing pcmp_XX methods for double/Packet2d This actually fixes an issue in unit-test packetmath_2 with pcmp_eq when it is compiled with clang. When pcmp_eq(Packet4f,Packet4f) is used instead of pcmp_eq(Packet2d,Packet2d), the unit-test does not pass due to NaN on ref vector. --- Eigen/src/Core/arch/AltiVec/PacketMath.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 30694d424..521e6076d 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -1009,6 +1009,14 @@ template<> EIGEN_STRONG_INLINE Packet2d pmax(const Packet2d& a, const return ret; } +template<> EIGEN_STRONG_INLINE Packet2d pcmp_le(const Packet2d& a, const Packet2d& b) { return reinterpret_cast(vec_cmple(a,b)); } +template<> EIGEN_STRONG_INLINE Packet2d pcmp_lt(const Packet2d& a, const Packet2d& b) { return reinterpret_cast(vec_cmplt(a,b)); } +template<> EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) { return reinterpret_cast(vec_cmpeq(a,b)); } +template<> EIGEN_STRONG_INLINE Packet2d pcmp_lt_or_nan(const Packet2d& a, const Packet2d& b) { + Packet2d c = reinterpret_cast(vec_cmpge(a,b)); + return vec_nor(c,c); +} + template<> EIGEN_STRONG_INLINE Packet2d pand(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); } template<> EIGEN_STRONG_INLINE Packet2d por(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); } From 5ac7984ffa2076cc5b26fb220a3b351951251c2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20P=2E=20L=2E=20de=20Carvalho?= Date: Wed, 14 Aug 2019 11:59:12 -0600 Subject: [PATCH 5/5] Fix debug macros in p{load,store}u --- Eigen/src/Core/arch/AltiVec/PacketMath.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 521e6076d..7ee290a29 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -539,12 +539,12 @@ template<> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet4i& f // We also need to redefine little endian loading of Packet4i/Packet4f using VSX template<> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet4i& from) { - EIGEN_DEBUG_ALIGNED_STORE + EIGEN_DEBUG_UNALIGNED_STORE vec_vsx_st(from, 0, to); } template<> EIGEN_STRONG_INLINE void pstoreu(float* to, const Packet4f& from) { - EIGEN_DEBUG_ALIGNED_STORE + EIGEN_DEBUG_UNALIGNED_STORE vec_vsx_st(from, 0, to); } #endif @@ -1031,7 +1031,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pfloor(const Packet2d& a) { re template<> EIGEN_STRONG_INLINE Packet2d ploadu(const double* from) { - EIGEN_DEBUG_ALIGNED_LOAD + EIGEN_DEBUG_UNALIGNED_LOAD return vec_vsx_ld(0, from); } @@ -1045,7 +1045,7 @@ template<> EIGEN_STRONG_INLINE Packet2d ploaddup(const double* from) template<> EIGEN_STRONG_INLINE void pstoreu(double* to, const Packet2d& from) { - EIGEN_DEBUG_ALIGNED_STORE + EIGEN_DEBUG_UNALIGNED_STORE vec_vsx_st(from, 0, to); }