From ced1a45f82684ab18cbe4d830f6832407cabadb3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 20 Jul 2010 14:24:01 +0200 Subject: [PATCH] add NEON ploaddup and pcplxflip functions --- Eigen/src/Core/arch/NEON/Complex.h | 5 +++++ Eigen/src/Core/arch/NEON/PacketMath.h | 27 +++++++++++++++++++-------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index 6d9e8da85..9678040e7 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -141,6 +141,11 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_preverse(const Packet2cf& a) return Packet2cf(a_r128); } +EIGEN_STRONG_INLINE Packet2cf ei_pcplxflip/**/(const Packet2cf& x) +{ + return Packet2cf(vrev64q_f32(a.v)); +} + template<> EIGEN_STRONG_INLINE std::complex ei_predux(const Packet2cf& a) { float32x2_t a1, a2; diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index b899fece1..8220ed07c 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -180,6 +180,21 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pload(const int* from) { EIG template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); } template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); } +template<> EIGEN_STRONG_INLINE Packet4f ei_ploaddup(const float* from) +{ + float32x2_t lo, ho; + lo = vdup_n_f32(*from); + hi = vdup_n_f32(*from); + return vcombine_f32(lo, hi); +} +template<> EIGEN_STRONG_INLINE Packet4i ei_ploaddup(const float* from) +{ + int32x2_t lo, ho; + lo = vdup_n_s32(*from); + hi = vdup_n_s32(*from); + return vcombine_s32(lo, hi); +} + template<> EIGEN_STRONG_INLINE void ei_pstore(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); } template<> EIGEN_STRONG_INLINE void ei_pstore(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); } @@ -195,25 +210,21 @@ template<> EIGEN_STRONG_INLINE int ei_pfirst(const Packet4i& a) { i template<> EIGEN_STRONG_INLINE Packet4f ei_preverse(const Packet4f& a) { float32x2_t a_lo, a_hi; - Packet4f a_r64, a_r128; + Packet4f a_r64; a_r64 = vrev64q_f32(a); a_lo = vget_low_f32(a_r64); a_hi = vget_high_f32(a_r64); - a_r128 = vcombine_f32(a_hi, a_lo); - - return a_r128; + return vcombine_f32(a_hi, a_lo); } template<> EIGEN_STRONG_INLINE Packet4i ei_preverse(const Packet4i& a) { int32x2_t a_lo, a_hi; - Packet4i a_r64, a_r128; + Packet4i a_r64; a_r64 = vrev64q_s32(a); a_lo = vget_low_s32(a_r64); a_hi = vget_high_s32(a_r64); - a_r128 = vcombine_s32(a_hi, a_lo); - - return a_r128; + return vcombine_s32(a_hi, a_lo); } template<> EIGEN_STRONG_INLINE Packet4f ei_pabs(const Packet4f& a) { return vabsq_f32(a); } template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a) { return vabsq_s32(a); }