diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index dea744396..43ad28d42 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -110,22 +110,8 @@ template<> struct unpacket_traits { typedef float type; enum {size=4} template<> struct unpacket_traits { typedef double type; enum {size=2}; }; template<> struct unpacket_traits { typedef int type; enum {size=4}; }; -#ifdef __GNUC__ -// Sometimes GCC implements _mm_set1_p* using multiple moves, -// that is inefficient :( (e.g., see gemm_pack_rhs) -template<> EIGEN_STRONG_INLINE Packet4f pset1(const float& from) { - Packet4f res = _mm_set_ss(from); - return vec4f_swizzle1(res,0,0,0,0); -} -template<> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { - // NOTE the SSE3 intrinsic _mm_loaddup_pd is never faster but sometimes much slower - Packet2d res = _mm_set_sd(from); - return vec2d_swizzle1(res, 0, 0); -} -#else template<> EIGEN_STRONG_INLINE Packet4f pset1(const float& from) { return _mm_set1_ps(from); } template<> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { return _mm_set1_pd(from); } -#endif template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { return _mm_set1_epi32(from); } template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) { return _mm_add_ps(pset1(a), _mm_set_ps(3,2,1,0)); }