From fb95e90f7faa0926029727e7dfde98993f2c9e00 Mon Sep 17 00:00:00 2001 From: Charles Schlosser Date: Mon, 29 Apr 2024 23:45:49 +0000 Subject: [PATCH] Add truncation op --- Eigen/src/Core/GenericPacketMath.h | 43 +++--- Eigen/src/Core/GlobalFunctions.h | 7 +- Eigen/src/Core/MathFunctions.h | 43 +++--- Eigen/src/Core/arch/AVX/PacketMath.h | 39 +++--- Eigen/src/Core/arch/AVX512/PacketMath.h | 37 +++--- Eigen/src/Core/arch/AVX512/PacketMathFP16.h | 13 +- Eigen/src/Core/arch/AltiVec/PacketMath.h | 26 ++-- Eigen/src/Core/arch/Default/BFloat16.h | 1 + .../arch/Default/GenericPacketMathFunctions.h | 89 +++++++++++++ .../Default/GenericPacketMathFunctionsFwd.h | 15 +++ Eigen/src/Core/arch/Default/Half.h | 1 + Eigen/src/Core/arch/GPU/PacketMath.h | 3 +- Eigen/src/Core/arch/HVX/PacketMath.h | 3 - Eigen/src/Core/arch/MSA/PacketMath.h | 6 - Eigen/src/Core/arch/NEON/PacketMath.h | 122 +++++++----------- Eigen/src/Core/arch/SSE/PacketMath.h | 81 +----------- Eigen/src/Core/arch/SVE/PacketMath.h | 1 - Eigen/src/Core/arch/ZVector/PacketMath.h | 6 - Eigen/src/Core/functors/UnaryFunctors.h | 26 +++- Eigen/src/plugins/ArrayCwiseUnaryOps.inc | 10 ++ doc/snippets/Cwise_trunc.cpp | 3 + test/array_cwise.cpp | 9 +- test/packetmath.cpp | 16 ++- 23 files changed, 333 insertions(+), 267 deletions(-) create mode 100644 doc/snippets/Cwise_trunc.cpp diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index fc5d7570c..381d8fff5 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -57,6 +57,9 @@ struct default_packet_traits { HasConj = 1, HasSetLinear = 1, HasSign = 1, + // By default, the nearest integer functions (rint, round, floor, ceil, trunc) are enabled for all scalar and packet + // types + HasRound = 1, HasArg = 0, HasAbsDiff = 0, @@ -64,10 +67,6 @@ struct default_packet_traits { // This flag is used to indicate whether packet comparison is supported. // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true. HasCmp = 0, - HasRound = 0, - HasRint = 0, - HasFloor = 0, - HasCeil = 0, HasDiv = 0, HasReciprocal = 0, @@ -1138,33 +1137,45 @@ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcbrt(const Packet& return numext::cbrt(a); } +template ::value, + bool IsInteger = NumTraits::type>::IsInteger> +struct nearest_integer_packetop_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_floor(const Packet& x) { return numext::floor(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_ceil(const Packet& x) { return numext::ceil(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_rint(const Packet& x) { return numext::rint(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_round(const Packet& x) { return numext::round(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_trunc(const Packet& x) { return numext::trunc(x); } +}; + /** \internal \returns the rounded value of \a a (coeff-wise) */ template -EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pround(const Packet& a) { - using numext::round; - return round(a); +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pround(const Packet& a) { + return nearest_integer_packetop_impl::run_round(a); } /** \internal \returns the floor of \a a (coeff-wise) */ template -EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pfloor(const Packet& a) { - using numext::floor; - return floor(a); +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pfloor(const Packet& a) { + return nearest_integer_packetop_impl::run_floor(a); } /** \internal \returns the rounded value of \a a (coeff-wise) with current * rounding mode */ template -EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet print(const Packet& a) { - using numext::rint; - return rint(a); +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet print(const Packet& a) { + return nearest_integer_packetop_impl::run_rint(a); } /** \internal \returns the ceil of \a a (coeff-wise) */ template -EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pceil(const Packet& a) { - using numext::ceil; - return ceil(a); +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pceil(const Packet& a) { + return nearest_integer_packetop_impl::run_ceil(a); +} + +/** \internal \returns the truncation of \a a (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ptrunc(const Packet& a) { + return nearest_integer_packetop_impl::run_trunc(a); } template diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index f0ae5a856..3f147b8f6 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -98,9 +98,12 @@ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rint, scalar_rint_op, EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round, scalar_round_op, nearest integer,\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY( - floor, scalar_floor_op, nearest integer not greater than the giben value,\sa Eigen::ceil DOXCOMMA ArrayBase::floor) + floor, scalar_floor_op, nearest integer not greater than the given value,\sa Eigen::ceil DOXCOMMA ArrayBase::floor) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY( - ceil, scalar_ceil_op, nearest integer not less than the giben value,\sa Eigen::floor DOXCOMMA ArrayBase::ceil) + ceil, scalar_ceil_op, nearest integer not less than the given value,\sa Eigen::floor DOXCOMMA ArrayBase::ceil) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(trunc, scalar_trunc_op, + nearest integer not greater in magnitude than the given value,\sa Eigen::trunc DOXCOMMA + ArrayBase::trunc) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY( isnan, scalar_isnan_op, not -a - number test,\sa Eigen::isinf DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isnan) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY( diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 2a42b1864..6bb9a1202 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -894,6 +894,9 @@ struct nearest_integer_impl { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_round(const Scalar& x) { EIGEN_USING_STD(round) return round(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_trunc(const Scalar& x) { + EIGEN_USING_STD(trunc) return trunc(x); + } }; template struct nearest_integer_impl { @@ -901,6 +904,7 @@ struct nearest_integer_impl { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_ceil(const Scalar& x) { return x; } static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_rint(const Scalar& x) { return x; } static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_round(const Scalar& x) { return x; } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_trunc(const Scalar& x) { return x; } }; } // end namespace internal @@ -1192,17 +1196,26 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar round(const Scalar& x) { return internal::nearest_integer_impl::run_round(x); } -#if defined(SYCL_DEVICE_ONLY) -SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(round, round) -#endif - template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar(floor)(const Scalar& x) { return internal::nearest_integer_impl::run_floor(x); } +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar(ceil)(const Scalar& x) { + return internal::nearest_integer_impl::run_ceil(x); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar(trunc)(const Scalar& x) { + return internal::nearest_integer_impl::run_trunc(x); +} + #if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(round, round) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(floor, floor) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(ceil, ceil) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(trunc, trunc) #endif #if defined(EIGEN_GPUCC) @@ -1210,32 +1223,26 @@ template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float floor(const float& x) { return ::floorf(x); } - template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double floor(const double& x) { return ::floor(x); } -#endif - -template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar(ceil)(const Scalar& x) { - return internal::nearest_integer_impl::run_ceil(x); -} - -#if defined(SYCL_DEVICE_ONLY) -SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(ceil, ceil) -#endif - -#if defined(EIGEN_GPUCC) template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float ceil(const float& x) { return ::ceilf(x); } - template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double ceil(const double& x) { return ::ceil(x); } +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float trunc(const float& x) { + return ::truncf(x); +} +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double trunc(const double& x) { + return ::trunc(x); +} #endif // Integer division with rounding up. diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index dac43fcd8..b05429cfe 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -124,11 +124,7 @@ struct packet_traits : default_packet_traits { HasRsqrt = 1, HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, - HasBlend = 1, - HasRound = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1 + HasBlend = 1 }; }; template <> @@ -151,11 +147,7 @@ struct packet_traits : default_packet_traits { HasSqrt = 1, HasRsqrt = 1, HasATan = 1, - HasBlend = 1, - HasRound = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1 + HasBlend = 1 }; }; @@ -192,10 +184,6 @@ struct packet_traits : default_packet_traits { HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, HasBlend = 0, - HasRound = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1, HasBessel = 1, HasNdtri = 1 }; @@ -235,10 +223,6 @@ struct packet_traits : default_packet_traits { HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, HasBlend = 0, - HasRound = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1, HasBessel = 1, HasNdtri = 1 }; @@ -1257,6 +1241,15 @@ EIGEN_STRONG_INLINE Packet4d pfloor(const Packet4d& a) { return _mm256_floor_pd(a); } +template <> +EIGEN_STRONG_INLINE Packet8f ptrunc(const Packet8f& a) { + return _mm256_round_ps(a, _MM_FROUND_TRUNC); +} +template <> +EIGEN_STRONG_INLINE Packet4d ptrunc(const Packet4d& a) { + return _mm256_round_pd(a, _MM_FROUND_TRUNC); +} + template <> EIGEN_STRONG_INLINE Packet8i ptrue(const Packet8i& a) { #ifdef EIGEN_VECTORIZE_AVX2 @@ -2311,6 +2304,11 @@ EIGEN_STRONG_INLINE Packet8h pfloor(const Packet8h& a) { return float2half(pfloor(half2float(a))); } +template <> +EIGEN_STRONG_INLINE Packet8h ptrunc(const Packet8h& a) { + return float2half(ptrunc(half2float(a))); +} + template <> EIGEN_STRONG_INLINE Packet8h pcmp_eq(const Packet8h& a, const Packet8h& b) { return Pack16To8(pcmp_eq(half2float(a), half2float(b))); @@ -2686,6 +2684,11 @@ EIGEN_STRONG_INLINE Packet8bf pfloor(const Packet8bf& a) { return F32ToBf16(pfloor(Bf16ToF32(a))); } +template <> +EIGEN_STRONG_INLINE Packet8bf ptrunc(const Packet8bf& a) { + return F32ToBf16(ptrunc(Bf16ToF32(a))); +} + template <> EIGEN_STRONG_INLINE Packet8bf pcmp_eq(const Packet8bf& a, const Packet8bf& b) { return Pack16To8(pcmp_eq(Bf16ToF32(a), Bf16ToF32(b))); diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 8f7662f47..9a0edcacb 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -97,11 +97,7 @@ struct packet_traits : default_packet_traits { HasCos = EIGEN_FAST_MATH, HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, - HasBlend = 0, - HasRound = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1 + HasBlend = 0 }; }; #endif @@ -138,11 +134,7 @@ struct packet_traits : default_packet_traits { HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, HasCmp = 1, - HasDiv = 1, - HasRound = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1 + HasDiv = 1 }; }; template <> @@ -162,11 +154,7 @@ struct packet_traits : default_packet_traits { HasExp = 1, HasATan = 1, HasCmp = 1, - HasDiv = 1, - HasRound = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1 + HasDiv = 1 }; }; @@ -781,6 +769,15 @@ EIGEN_STRONG_INLINE Packet8d pfloor(const Packet8d& a) { return _mm512_roundscale_pd(a, _MM_FROUND_TO_NEG_INF); } +template <> +EIGEN_STRONG_INLINE Packet16f ptrunc(const Packet16f& a) { + return _mm512_roundscale_ps(a, _MM_FROUND_TO_ZERO); +} +template <> +EIGEN_STRONG_INLINE Packet8d ptrunc(const Packet8d& a) { + return _mm512_roundscale_pd(a, _MM_FROUND_TO_ZERO); +} + template <> EIGEN_STRONG_INLINE Packet16i ptrue(const Packet16i& /*a*/) { return _mm512_set1_epi32(int32_t(-1)); @@ -2322,6 +2319,11 @@ EIGEN_STRONG_INLINE Packet16h pfloor(const Packet16h& a) { return float2half(pfloor(half2float(a))); } +template <> +EIGEN_STRONG_INLINE Packet16h ptrunc(const Packet16h& a) { + return float2half(ptrunc(half2float(a))); +} + template <> EIGEN_STRONG_INLINE Packet16h pcmp_eq(const Packet16h& a, const Packet16h& b) { Packet16f af = half2float(a); @@ -2821,6 +2823,11 @@ EIGEN_STRONG_INLINE Packet16bf pfloor(const Packet16bf& a) { return F32ToBf16(pfloor(Bf16ToF32(a))); } +template <> +EIGEN_STRONG_INLINE Packet16bf ptrunc(const Packet16bf& a) { + return F32ToBf16(ptrunc(Bf16ToF32(a))); +} + template <> EIGEN_STRONG_INLINE Packet16bf pcmp_eq(const Packet16bf& a, const Packet16bf& b) { return Pack32To16(pcmp_eq(Bf16ToF32(a), Bf16ToF32(b))); diff --git a/Eigen/src/Core/arch/AVX512/PacketMathFP16.h b/Eigen/src/Core/arch/AVX512/PacketMathFP16.h index 131e6f168..d4a5816ab 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +++ b/Eigen/src/Core/arch/AVX512/PacketMathFP16.h @@ -60,11 +60,7 @@ struct packet_traits : default_packet_traits { HasCos = EIGEN_FAST_MATH, HasTanh = EIGEN_FAST_MATH, HasErf = 0, // EIGEN_FAST_MATH, - HasBlend = 0, - HasRound = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1 + HasBlend = 0 }; }; @@ -390,6 +386,13 @@ EIGEN_STRONG_INLINE Packet32h pfloor(const Packet32h& a) { return _mm512_roundscale_ph(a, _MM_FROUND_TO_NEG_INF); } +// ptrunc + +template <> +EIGEN_STRONG_INLINE Packet32h ptrunc(const Packet32h& a) { + return _mm512_roundscale_ph(a, _MM_FROUND_TO_ZERO); +} + // predux template <> EIGEN_STRONG_INLINE half predux(const Packet32h& a) { diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index b0f7262ff..4c92e05b0 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -193,17 +193,12 @@ struct packet_traits : default_packet_traits { #endif HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, - HasRint = 1, #else HasSqrt = 0, HasRsqrt = 0, HasTanh = 0, HasErf = 0, - HasRint = 0, #endif - HasRound = 1, - HasFloor = 1, - HasCeil = 1, HasNegate = 1, HasBlend = 1 }; @@ -235,17 +230,12 @@ struct packet_traits : default_packet_traits { #else HasRsqrt = 0, #endif - HasRint = 1, #else HasSqrt = 0, HasRsqrt = 0, - HasRint = 0, #endif HasTanh = 0, HasErf = 0, - HasRound = 1, - HasFloor = 1, - HasCeil = 1, HasNegate = 1, HasBlend = 1 }; @@ -1506,6 +1496,10 @@ template <> EIGEN_STRONG_INLINE Packet4f pfloor(const Packet4f& a) { return vec_floor(a); } +template <> +EIGEN_STRONG_INLINE Packet4f ptrunc(const Packet4f& a) { + return vec_trunc(a); +} #ifdef EIGEN_VECTORIZE_VSX template <> EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) { @@ -2364,6 +2358,10 @@ template <> EIGEN_STRONG_INLINE Packet8bf pround(const Packet8bf& a) { BF16_TO_F32_UNARY_OP_WRAPPER(pround, a); } +template <> +EIGEN_STRONG_INLINE Packet8bf ptrunc(const Packet8bf& a) { + BF16_TO_F32_UNARY_OP_WRAPPER(ptrunc, a); +} #ifdef EIGEN_VECTORIZE_VSX template <> EIGEN_STRONG_INLINE Packet8bf print(const Packet8bf& a) { @@ -3189,10 +3187,6 @@ struct packet_traits : default_packet_traits { #else HasRsqrt = 0, #endif - HasRound = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1, HasNegate = 1, HasBlend = 1 }; @@ -3446,6 +3440,10 @@ EIGEN_STRONG_INLINE Packet2d pfloor(const Packet2d& a) { return vec_floor(a); } template <> +EIGEN_STRONG_INLINE Packet2d ptrunc(const Packet2d& a) { + return vec_trunc(a); +} +template <> EIGEN_STRONG_INLINE Packet2d print(const Packet2d& a) { Packet2d res; diff --git a/Eigen/src/Core/arch/Default/BFloat16.h b/Eigen/src/Core/arch/Default/BFloat16.h index be44909a4..f31c6cee6 100644 --- a/Eigen/src/Core/arch/Default/BFloat16.h +++ b/Eigen/src/Core/arch/Default/BFloat16.h @@ -637,6 +637,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16& a) { return EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 ceil(const bfloat16& a) { return bfloat16(::ceilf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 rint(const bfloat16& a) { return bfloat16(::rintf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 round(const bfloat16& a) { return bfloat16(::roundf(float(a))); } +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 trunc(const bfloat16& a) { return bfloat16(::truncf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmod(const bfloat16& a, const bfloat16& b) { return bfloat16(::fmodf(float(a), float(b))); } diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 16ca80728..537dffe9a 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -2469,6 +2469,95 @@ struct unary_pow_impl { } }; +template +EIGEN_STRONG_INLINE Packet generic_rint(const Packet& a) { + using Scalar = typename unpacket_traits::type; + using IntType = typename numext::get_integer_by_size::signed_type; + // Adds and subtracts signum(a) * 2^kMantissaBits to force rounding. + const IntType kLimit = IntType(1) << (NumTraits::digits() - 1); + const Packet cst_limit = pset1(static_cast(kLimit)); + Packet abs_a = pabs(a); + Packet sign_a = pandnot(a, abs_a); + Packet rint_a = padd(abs_a, cst_limit); + // Don't compile-away addition and subtraction. + EIGEN_OPTIMIZATION_BARRIER(rint_a); + rint_a = psub(rint_a, cst_limit); + rint_a = por(rint_a, sign_a); + // If greater than limit (or NaN), simply return a. + Packet mask = pcmp_lt(abs_a, cst_limit); + Packet result = pselect(mask, rint_a, a); + return result; +} + +template +EIGEN_STRONG_INLINE Packet generic_floor(const Packet& a) { + using Scalar = typename unpacket_traits::type; + const Packet cst_1 = pset1(Scalar(1)); + Packet rint_a = generic_rint(a); + // if a < rint(a), then rint(a) == ceil(a) + Packet mask = pcmp_lt(a, rint_a); + Packet offset = pand(cst_1, mask); + Packet result = psub(rint_a, offset); + return result; +} + +template +EIGEN_STRONG_INLINE Packet generic_ceil(const Packet& a) { + using Scalar = typename unpacket_traits::type; + const Packet cst_1 = pset1(Scalar(1)); + Packet rint_a = generic_rint(a); + // if rint(a) < a, then rint(a) == floor(a) + Packet mask = pcmp_lt(rint_a, a); + Packet offset = pand(cst_1, mask); + Packet result = padd(rint_a, offset); + return result; +} + +template +EIGEN_STRONG_INLINE Packet generic_trunc(const Packet& a) { + Packet abs_a = pabs(a); + Packet sign_a = pandnot(a, abs_a); + Packet floor_abs_a = generic_floor(abs_a); + Packet result = por(floor_abs_a, sign_a); + return result; +} + +template +EIGEN_STRONG_INLINE Packet generic_round(const Packet& a) { + using Scalar = typename unpacket_traits::type; + const Packet cst_half = pset1(Scalar(0.5)); + const Packet cst_1 = pset1(Scalar(1)); + Packet abs_a = pabs(a); + Packet sign_a = pandnot(a, abs_a); + Packet floor_abs_a = generic_floor(abs_a); + Packet diff = psub(abs_a, floor_abs_a); + Packet mask = pcmp_le(cst_half, diff); + Packet offset = pand(cst_1, mask); + Packet result = padd(floor_abs_a, offset); + result = por(result, sign_a); + return result; +} + +template +struct nearest_integer_packetop_impl { + using Scalar = typename unpacket_traits::type; + static_assert(packet_traits::HasRound, "Generic nearest integer functions are disabled for this type."); + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_floor(const Packet& x) { return generic_floor(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_ceil(const Packet& x) { return generic_ceil(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_rint(const Packet& x) { return generic_rint(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_round(const Packet& x) { return generic_round(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_trunc(const Packet& x) { return generic_trunc(x); } +}; + +template +struct nearest_integer_packetop_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_floor(const Packet& x) { return x; } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_ceil(const Packet& x) { return x; } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_rint(const Packet& x) { return x; } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_round(const Packet& x) { return x; } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_trunc(const Packet& x) { return x; } +}; + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h index 05cac5cb6..41dc068fc 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h @@ -133,6 +133,21 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog_complex(const Pa template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp_complex(const Packet& x); +template +EIGEN_STRONG_INLINE Packet generic_rint(const Packet& a); + +template +EIGEN_STRONG_INLINE Packet generic_floor(const Packet& a); + +template +EIGEN_STRONG_INLINE Packet generic_ceil(const Packet& a); + +template +EIGEN_STRONG_INLINE Packet generic_trunc(const Packet& a); + +template +EIGEN_STRONG_INLINE Packet generic_round(const Packet& a); + // Macros for instantiating these generic functions for different backends. #define EIGEN_PACKET_FUNCTION(METHOD, SCALAR, PACKET) \ template <> \ diff --git a/Eigen/src/Core/arch/Default/Half.h b/Eigen/src/Core/arch/Default/Half.h index 17d534dc8..9c195c12a 100644 --- a/Eigen/src/Core/arch/Default/Half.h +++ b/Eigen/src/Core/arch/Default/Half.h @@ -722,6 +722,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) { } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half rint(const half& a) { return half(::rintf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half round(const half& a) { return half(::roundf(float(a))); } +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half trunc(const half& a) { return half(::truncf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half fmod(const half& a, const half& b) { return half(::fmodf(float(a), float(b))); } diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h index 7900b0e3b..352c8f5bf 100644 --- a/Eigen/src/Core/arch/GPU/PacketMath.h +++ b/Eigen/src/Core/arch/GPU/PacketMath.h @@ -75,8 +75,7 @@ struct packet_traits : default_packet_traits { HasIGammac = 1, HasBetaInc = 1, - HasBlend = 0, - HasFloor = 1, + HasBlend = 0 }; }; diff --git a/Eigen/src/Core/arch/HVX/PacketMath.h b/Eigen/src/Core/arch/HVX/PacketMath.h index 7e139de13..ccba96efd 100644 --- a/Eigen/src/Core/arch/HVX/PacketMath.h +++ b/Eigen/src/Core/arch/HVX/PacketMath.h @@ -161,9 +161,6 @@ struct packet_traits : default_packet_traits { HasBlend = 0, HasDiv = 0, - HasFloor = 0, - HasCeil = 0, - HasRint = 0, HasSin = 0, HasCos = 0, diff --git a/Eigen/src/Core/arch/MSA/PacketMath.h b/Eigen/src/Core/arch/MSA/PacketMath.h index c1843c30a..81da24f8d 100644 --- a/Eigen/src/Core/arch/MSA/PacketMath.h +++ b/Eigen/src/Core/arch/MSA/PacketMath.h @@ -91,9 +91,6 @@ struct packet_traits : default_packet_traits { HasExp = 1, HasSqrt = 1, HasRsqrt = 1, - HasRound = 1, - HasFloor = 1, - HasCeil = 1, HasBlend = 1 }; }; @@ -859,9 +856,6 @@ struct packet_traits : default_packet_traits { HasExp = 1, HasSqrt = 1, HasRsqrt = 1, - HasRound = 1, - HasFloor = 1, - HasCeil = 1, HasBlend = 1 }; }; diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 2c18b5dc5..50cf56f0e 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -196,12 +196,7 @@ struct packet_traits : default_packet_traits { HasConj = 1, HasSetLinear = 1, HasBlend = 0, - HasDiv = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1, - HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, HasACos = 1, @@ -4470,76 +4465,25 @@ EIGEN_STRONG_INLINE Packet4f pceil(const Packet4f& a) { return vrndpq_f32(a); } -#else - template <> -EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) { - // Adds and subtracts signum(a) * 2^23 to force rounding. - const Packet4f limit = pset1(static_cast(1 << 23)); - const Packet4f abs_a = pabs(a); - Packet4f r = padd(abs_a, limit); - // Don't compile-away addition and subtraction. - EIGEN_OPTIMIZATION_BARRIER(r); - r = psub(r, limit); - // If greater than limit, simply return a. Otherwise, account for sign. - r = pselect(pcmp_lt(abs_a, limit), pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a); - return r; +EIGEN_STRONG_INLINE Packet2f pround(const Packet2f& a) { + return vrnda_f32(a); } template <> -EIGEN_STRONG_INLINE Packet2f print(const Packet2f& a) { - // Adds and subtracts signum(a) * 2^23 to force rounding. - const Packet2f limit = pset1(static_cast(1 << 23)); - const Packet2f abs_a = pabs(a); - Packet2f r = padd(abs_a, limit); - // Don't compile-away addition and subtraction. - EIGEN_OPTIMIZATION_BARRIER(r); - r = psub(r, limit); - // If greater than limit, simply return a. Otherwise, account for sign. - r = pselect(pcmp_lt(abs_a, limit), pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a); - return r; +EIGEN_STRONG_INLINE Packet4f pround(const Packet4f& a) { + return vrndaq_f32(a); } template <> -EIGEN_STRONG_INLINE Packet4f pfloor(const Packet4f& a) { - const Packet4f cst_1 = pset1(1.0f); - Packet4f tmp = print(a); - // If greater, subtract one. - Packet4f mask = pcmp_lt(a, tmp); - mask = pand(mask, cst_1); - return psub(tmp, mask); +EIGEN_STRONG_INLINE Packet2f ptrunc(const Packet2f& a) { + return vrnd_f32(a); } template <> -EIGEN_STRONG_INLINE Packet2f pfloor(const Packet2f& a) { - const Packet2f cst_1 = pset1(1.0f); - Packet2f tmp = print(a); - // If greater, subtract one. - Packet2f mask = pcmp_lt(a, tmp); - mask = pand(mask, cst_1); - return psub(tmp, mask); +EIGEN_STRONG_INLINE Packet4f ptrunc(const Packet4f& a) { + return vrndq_f32(a); } - -template <> -EIGEN_STRONG_INLINE Packet4f pceil(const Packet4f& a) { - const Packet4f cst_1 = pset1(1.0f); - Packet4f tmp = print(a); - // If smaller, add one. - Packet4f mask = pcmp_lt(tmp, a); - mask = pand(mask, cst_1); - return padd(tmp, mask); -} - -template <> -EIGEN_STRONG_INLINE Packet2f pceil(const Packet2f& a) { - const Packet2f cst_1 = pset1(1.0); - Packet2f tmp = print(a); - // If smaller, add one. - Packet2f mask = pcmp_lt(tmp, a); - mask = pand(mask, cst_1); - return padd(tmp, mask); -} - #endif /** @@ -4800,10 +4744,6 @@ struct packet_traits : default_packet_traits { HasSetLinear = 1, HasBlend = 0, HasDiv = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1, - HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, HasLog = 1, @@ -4983,6 +4923,16 @@ EIGEN_STRONG_INLINE Packet4bf pceil(const Packet4bf& a) { return F32ToBf16(pceil(Bf16ToF32(a))); } +template <> +EIGEN_STRONG_INLINE Packet4bf pround(const Packet4bf& a) { + return F32ToBf16(pround(Bf16ToF32(a))); +} + +template <> +EIGEN_STRONG_INLINE Packet4bf ptrunc(const Packet4bf& a) { + return F32ToBf16(ptrunc(Bf16ToF32(a))); +} + template <> EIGEN_STRONG_INLINE Packet4bf pconj(const Packet4bf& a) { return a; @@ -5168,9 +5118,6 @@ struct packet_traits : default_packet_traits { HasBlend = 0, HasDiv = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1, #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG HasExp = 1, @@ -5460,6 +5407,16 @@ EIGEN_STRONG_INLINE Packet2d pceil(const Packet2d& a) { return vrndpq_f64(a); } +template <> +EIGEN_STRONG_INLINE Packet2d pround(const Packet2d& a) { + return vrndaq_f64(a); +} + +template <> +EIGEN_STRONG_INLINE Packet2d ptrunc(const Packet2d& a) { + return vrndq_f64(a); +} + template <> EIGEN_STRONG_INLINE Packet2d pldexp(const Packet2d& a, const Packet2d& exponent) { return pldexp_generic(a, exponent); @@ -5521,9 +5478,6 @@ struct packet_traits : default_packet_traits { HasInsert = 1, HasReduxp = 1, HasDiv = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1, HasSin = 0, HasCos = 0, HasLog = 0, @@ -5791,6 +5745,26 @@ EIGEN_STRONG_INLINE Packet4hf pceil(const Packet4hf& a) { return vrndp_f16(a); } +template <> +EIGEN_STRONG_INLINE Packet8hf pround(const Packet8hf& a) { + return vrndaq_f16(a); +} + +template <> +EIGEN_STRONG_INLINE Packet4hf pround(const Packet4hf& a) { + return vrnda_f16(a); +} + +template <> +EIGEN_STRONG_INLINE Packet8hf ptrunc(const Packet8hf& a) { + return vrndq_f16(a); +} + +template <> +EIGEN_STRONG_INLINE Packet4hf ptrunc(const Packet4hf& a) { + return vrnd_f16(a); +} + template <> EIGEN_STRONG_INLINE Packet8hf psqrt(const Packet8hf& a) { return vsqrtq_f16(a); diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 7bac3f9c7..e19e9480a 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -198,12 +198,6 @@ struct packet_traits : default_packet_traits { HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, HasBlend = 1, - HasCeil = 1, - HasFloor = 1, -#ifdef EIGEN_VECTORIZE_SSE4_1 - HasRound = 1, -#endif - HasRint = 1, HasSign = 0 // The manually vectorized version is slightly slower for SSE. }; }; @@ -225,13 +219,7 @@ struct packet_traits : default_packet_traits { HasSqrt = 1, HasRsqrt = 1, HasATan = 1, - HasBlend = 1, - HasFloor = 1, - HasCeil = 1, -#ifdef EIGEN_VECTORIZE_SSE4_1 - HasRound = 1, -#endif - HasRint = 1 + HasBlend = 1 }; }; template <> @@ -1309,73 +1297,14 @@ template <> EIGEN_STRONG_INLINE Packet2d pfloor(const Packet2d& a) { return _mm_floor_pd(a); } -#else -template <> -EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) { - // Adds and subtracts signum(a) * 2^23 to force rounding. - const Packet4f limit = pset1(static_cast(1 << 23)); - const Packet4f abs_a = pabs(a); - Packet4f r = padd(abs_a, limit); - // Don't compile-away addition and subtraction. - EIGEN_OPTIMIZATION_BARRIER(r); - r = psub(r, limit); - // If greater than limit, simply return a. Otherwise, account for sign. - r = pselect(pcmp_lt(abs_a, limit), pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a); - return r; -} template <> -EIGEN_STRONG_INLINE Packet2d print(const Packet2d& a) { - // Adds and subtracts signum(a) * 2^52 to force rounding. - const Packet2d limit = pset1(static_cast(1ull << 52)); - const Packet2d abs_a = pabs(a); - Packet2d r = padd(abs_a, limit); - // Don't compile-away addition and subtraction. - EIGEN_OPTIMIZATION_BARRIER(r); - r = psub(r, limit); - // If greater than limit, simply return a. Otherwise, account for sign. - r = pselect(pcmp_lt(abs_a, limit), pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a); - return r; +EIGEN_STRONG_INLINE Packet4f ptrunc(const Packet4f& a) { + return _mm_round_ps(a, _MM_FROUND_TRUNC); } - template <> -EIGEN_STRONG_INLINE Packet4f pfloor(const Packet4f& a) { - const Packet4f cst_1 = pset1(1.0f); - Packet4f tmp = print(a); - // If greater, subtract one. - Packet4f mask = _mm_cmpgt_ps(tmp, a); - mask = pand(mask, cst_1); - return psub(tmp, mask); -} - -template <> -EIGEN_STRONG_INLINE Packet2d pfloor(const Packet2d& a) { - const Packet2d cst_1 = pset1(1.0); - Packet2d tmp = print(a); - // If greater, subtract one. - Packet2d mask = _mm_cmpgt_pd(tmp, a); - mask = pand(mask, cst_1); - return psub(tmp, mask); -} - -template <> -EIGEN_STRONG_INLINE Packet4f pceil(const Packet4f& a) { - const Packet4f cst_1 = pset1(1.0f); - Packet4f tmp = print(a); - // If smaller, add one. - Packet4f mask = _mm_cmplt_ps(tmp, a); - mask = pand(mask, cst_1); - return padd(tmp, mask); -} - -template <> -EIGEN_STRONG_INLINE Packet2d pceil(const Packet2d& a) { - const Packet2d cst_1 = pset1(1.0); - Packet2d tmp = print(a); - // If smaller, add one. - Packet2d mask = _mm_cmplt_pd(tmp, a); - mask = pand(mask, cst_1); - return padd(tmp, mask); +EIGEN_STRONG_INLINE Packet2d ptrunc(const Packet2d& a) { + return _mm_round_pd(a, _MM_FROUND_TRUNC); } #endif diff --git a/Eigen/src/Core/arch/SVE/PacketMath.h b/Eigen/src/Core/arch/SVE/PacketMath.h index 6a03de964..3f847a9ca 100644 --- a/Eigen/src/Core/arch/SVE/PacketMath.h +++ b/Eigen/src/Core/arch/SVE/PacketMath.h @@ -353,7 +353,6 @@ struct packet_traits : default_packet_traits { HasReduxp = 0, // Not implemented in SVE HasDiv = 1, - HasFloor = 1, HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h index 8ac8f778c..b45681320 100644 --- a/Eigen/src/Core/arch/ZVector/PacketMath.h +++ b/Eigen/src/Core/arch/ZVector/PacketMath.h @@ -195,9 +195,6 @@ struct packet_traits : default_packet_traits { HasRsqrt = 1, HasTanh = 1, HasErf = 1, - HasRound = 1, - HasFloor = 1, - HasCeil = 1, HasNegate = 1, HasBlend = 1 }; @@ -225,9 +222,6 @@ struct packet_traits : default_packet_traits { HasExp = 1, HasSqrt = 1, HasRsqrt = 1, - HasRound = 1, - HasFloor = 1, - HasCeil = 1, HasNegate = 1, HasBlend = 1 }; diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 8d95819b0..2b0c05ce4 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -882,7 +882,7 @@ template struct functor_traits> { enum { Cost = NumTraits::MulCost, - PacketAccess = packet_traits::HasFloor || NumTraits::IsInteger + PacketAccess = packet_traits::HasRound || NumTraits::IsInteger }; }; @@ -902,7 +902,7 @@ template struct functor_traits> { enum { Cost = NumTraits::MulCost, - PacketAccess = packet_traits::HasRint || NumTraits::IsInteger + PacketAccess = packet_traits::HasRound || NumTraits::IsInteger }; }; @@ -922,7 +922,27 @@ template struct functor_traits> { enum { Cost = NumTraits::MulCost, - PacketAccess = packet_traits::HasCeil || NumTraits::IsInteger + PacketAccess = packet_traits::HasRound || NumTraits::IsInteger + }; +}; + +/** \internal + * \brief Template functor to compute the truncation of a scalar + * \sa class CwiseUnaryOp, ArrayBase::floor() + */ +template +struct scalar_trunc_op { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const { return numext::trunc(a); } + template + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { + return internal::ptrunc(a); + } +}; +template +struct functor_traits> { + enum { + Cost = NumTraits::MulCost, + PacketAccess = packet_traits::HasRound || NumTraits::IsInteger }; }; diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.inc b/Eigen/src/plugins/ArrayCwiseUnaryOps.inc index d03edc249..5e5d45b05 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.inc @@ -37,6 +37,7 @@ typedef CwiseUnaryOp, const Derived> RoundRetu typedef CwiseUnaryOp, const Derived> RintReturnType; typedef CwiseUnaryOp, const Derived> FloorReturnType; typedef CwiseUnaryOp, const Derived> CeilReturnType; +typedef CwiseUnaryOp, const Derived> TruncReturnType; typedef CwiseUnaryOp, const Derived> IsNaNReturnType; typedef CwiseUnaryOp, const Derived> IsInfReturnType; typedef CwiseUnaryOp, const Derived> IsFiniteReturnType; @@ -347,6 +348,15 @@ EIGEN_DEVICE_FUNC inline const FloorReturnType floor() const { return FloorRetur */ EIGEN_DEVICE_FUNC inline const CeilReturnType ceil() const { return CeilReturnType(derived()); } +/** \returns an expression of the coefficient-wise truncation of *this. + * + * Example: \include Cwise_trunc.cpp + * Output: \verbinclude Cwise_trunc.out + * + * \sa Math functions, floor(), round() + */ +EIGEN_DEVICE_FUNC inline const TruncReturnType trunc() const { return TruncReturnType(derived()); } + template struct ShiftRightXpr { typedef CwiseUnaryOp, const Derived> Type; diff --git a/doc/snippets/Cwise_trunc.cpp b/doc/snippets/Cwise_trunc.cpp new file mode 100644 index 000000000..8fb29c854 --- /dev/null +++ b/doc/snippets/Cwise_trunc.cpp @@ -0,0 +1,3 @@ +ArrayXd v = ArrayXd::LinSpaced(7, -2, 2); +cout << v << endl << endl; +cout << trunc(v) << endl; diff --git a/test/array_cwise.cpp b/test/array_cwise.cpp index 9fb104cb8..b5ad3c46f 100644 --- a/test/array_cwise.cpp +++ b/test/array_cwise.cpp @@ -40,6 +40,7 @@ template ::IsInteger, int> std::vector special_values() { const Scalar zero = Scalar(0); const Scalar eps = Eigen::NumTraits::epsilon(); + const Scalar one_half = Scalar(0.5); const Scalar one = Scalar(1); const Scalar two = Scalar(2); const Scalar three = Scalar(3); @@ -51,7 +52,7 @@ std::vector special_values() { const Scalar min = (std::numeric_limits::min)(); const Scalar max = (std::numeric_limits::max)(); const Scalar max_exp = (static_cast(int(Eigen::NumTraits::max_exponent())) * Scalar(EIGEN_LN2)) / eps; - return {zero, denorm_min, min, eps, sqrt_half, one, sqrt2, two, three, max_exp, max, inf, nan}; + return {zero, denorm_min, min, eps, sqrt_half, one_half, one, sqrt2, two, three, max_exp, max, inf, nan}; } template @@ -184,6 +185,11 @@ void unary_ops_test() { unary_op_test(UNARY_FUNCTOR_TEST_ARGS(asinh)); unary_op_test(UNARY_FUNCTOR_TEST_ARGS(acosh)); unary_op_test(UNARY_FUNCTOR_TEST_ARGS(atanh)); + unary_op_test(UNARY_FUNCTOR_TEST_ARGS(rint)); + unary_op_test(UNARY_FUNCTOR_TEST_ARGS(floor)); + unary_op_test(UNARY_FUNCTOR_TEST_ARGS(ceil)); + unary_op_test(UNARY_FUNCTOR_TEST_ARGS(round)); + unary_op_test(UNARY_FUNCTOR_TEST_ARGS(trunc)); /* FIXME: Enable when the behavior of rsqrt on denormals for half and double is fixed. unary_op_test("rsqrt", [](const auto& x) { return Eigen::rsqrt(x); }, @@ -791,6 +797,7 @@ void array_real(const ArrayType& m) { VERIFY_IS_APPROX(m1.rint(), rint(m1)); VERIFY_IS_APPROX(m1.floor(), floor(m1)); VERIFY_IS_APPROX(m1.ceil(), ceil(m1)); + VERIFY_IS_APPROX(m1.trunc(), trunc(m1)); VERIFY((m1.isNaN() == (Eigen::isnan)(m1)).all()); VERIFY((m1.isInf() == (Eigen::isinf)(m1)).all()); VERIFY((m1.isFinite() == (Eigen::isfinite)(m1)).all()); diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 894fb4126..208930dea 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -866,15 +866,16 @@ void packetmath_real() { CHECK_CWISE1_IF(PacketTraits::HasTan, std::tan, internal::ptan); CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::round, internal::pround); - CHECK_CWISE1_EXACT_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil); - CHECK_CWISE1_EXACT_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor); - CHECK_CWISE1_EXACT_IF(PacketTraits::HasRint, numext::rint, internal::print); + CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::ceil, internal::pceil); + CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::floor, internal::pfloor); + CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::rint, internal::print); + CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::trunc, internal::ptrunc); CHECK_CWISE1_IF(PacketTraits::HasSign, numext::sign, internal::psign); packetmath_boolean_mask_ops_real(); // Rounding edge cases. - if (PacketTraits::HasRound || PacketTraits::HasCeil || PacketTraits::HasFloor || PacketTraits::HasRint) { + if (PacketTraits::HasRound) { typedef typename internal::make_integer::type IntType; // Start with values that cannot fit inside an integer, work down to less than one. Scalar val = @@ -908,9 +909,10 @@ void packetmath_real() { for (size_t k = 0; k < values.size(); ++k) { data1[0] = values[k]; CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::round, internal::pround); - CHECK_CWISE1_EXACT_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil); - CHECK_CWISE1_EXACT_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor); - CHECK_CWISE1_EXACT_IF(PacketTraits::HasRint, numext::rint, internal::print); + CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::ceil, internal::pceil); + CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::floor, internal::pfloor); + CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::rint, internal::print); + CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::trunc, internal::ptrunc); } }