diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index c093ce96c..bd35d7d20 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -119,6 +119,8 @@ struct packet_traits : default_packet_traits { HasATanh = 1, HasSinh = 1, HasCosh = 1, + HasASinh = 1, + HasACosh = 1, HasLog = 1, HasLog10 = 1, HasExp = 1, @@ -153,6 +155,8 @@ struct packet_traits : default_packet_traits { #endif HasSinh = 1, HasCosh = 1, + HasASinh = 1, + HasACosh = 1, HasTanh = EIGEN_FAST_MATH, HasErf = 1, HasErfc = 1, diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index f2ca0bbc0..7e9979842 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -174,6 +174,8 @@ struct packet_traits : default_packet_traits { HasATanh = 1, HasSinh = 1, HasCosh = 1, + HasASinh = 1, + HasACosh = 1, HasSqrt = 1, HasRsqrt = 1, HasCbrt = 1, @@ -209,6 +211,8 @@ struct packet_traits : default_packet_traits { HasTan = EIGEN_FAST_MATH, HasSinh = 1, HasCosh = 1, + HasASinh = 1, + HasACosh = 1, HasLog = 1, HasLog10 = 1, HasExp = 1, diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathTrig.h b/Eigen/src/Core/arch/Default/GenericPacketMathTrig.h index e962b3742..baa0f9ed5 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathTrig.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathTrig.h @@ -977,9 +977,9 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcosh_double(const Pa //---------------------------------------------------------------------- /** \internal \returns the inverse hyperbolic sine of \a x (coeff-wise). - For small |x|: asinh(x) = sign(x) * log1p(|x| + x^2/(1 + sqrt(1 + x^2))) - For large |x|: asinh(x) = sign(x) * (log(|x|) + ln(2)) - Otherwise: asinh(x) = sign(x) * log(|x| + sqrt(x^2 + 1)) + Uses a single log1p call by selecting the argument before the transcendental: + For moderate |x|: log1p(|x| + x^2 / (1 + sqrt(1 + x^2))) + For large |x|: log1p(|x| - 1) + ln2 (avoids x^2 overflow) */ template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasinh_float(const Packet& x) { @@ -988,21 +988,21 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasinh_float(const Pa const Packet x_sign = pand(x, sign_mask); const Packet one = pset1(1.0f); - // For |x| < 0.5, use log1p formulation to avoid cancellation: - // asinh(x) = log1p(|x| + x^2 / (1 + sqrt(1 + x^2))) - const Packet x2 = pmul(abs_x, abs_x); - Packet p_small = generic_log1p(padd(abs_x, pdiv(x2, padd(one, psqrt(padd(one, x2)))))); - - // For 0.5 <= |x| < 1e10, use log(|x| + sqrt(x^2 + 1)). - Packet p_med = plog(padd(abs_x, psqrt(padd(x2, one)))); - - // For |x| >= 1e10, use log(2*|x|) = log(|x|) + ln(2) to avoid x^2 overflow. - const Packet ln2 = pset1(0.6931471805599453f); - Packet p_large = padd(plog(abs_x), ln2); - - const Packet small_mask = pcmp_lt(abs_x, pset1(0.5f)); + // For |x| >= 1e10, use log(2|x|) = log1p(|x| - 1) + ln2 to avoid x^2 overflow. const Packet large_mask = pcmp_lt(pset1(1e10f), abs_x); - Packet result = pselect(large_mask, p_large, pselect(small_mask, p_small, p_med)); + // Guard x^2 against overflow in the large case. + const Packet x2 = pmul(abs_x, pselect(large_mask, pzero(abs_x), abs_x)); + // For |x| < 1e10: log1p(|x| + x^2 / (1 + sqrt(1 + x^2))). + // Algebraically equivalent to log(|x| + sqrt(x^2 + 1)) + // but avoids cancellation for small |x|. + Packet normal_arg = padd(abs_x, pdiv(x2, padd(one, psqrt(padd(one, x2))))); + // For |x| >= 1e10: log1p(|x| - 1), then add ln2 after. + Packet large_arg = psub(abs_x, one); + // Select argument, then call log1p once. + Packet result = generic_log1p(pselect(large_mask, large_arg, normal_arg)); + // Add ln2 for the large path: log(2|x|) = log(|x|) + ln2 = log1p(|x|-1) + ln2. + const Packet ln2 = pset1(0.6931471805599453f); + result = pselect(large_mask, padd(result, ln2), result); return por(x_sign, result); } @@ -1013,49 +1013,37 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasinh_double(const P const Packet x_sign = pand(x, sign_mask); const Packet one = pset1(1.0); - const Packet x2 = pmul(abs_x, abs_x); - Packet p_small = generic_log1p(padd(abs_x, pdiv(x2, padd(one, psqrt(padd(one, x2)))))); - - Packet p_med = plog(padd(abs_x, psqrt(padd(x2, one)))); - - const Packet ln2 = pset1(0.6931471805599453); - Packet p_large = padd(plog(abs_x), ln2); - - const Packet small_mask = pcmp_lt(abs_x, pset1(0.5)); const Packet large_mask = pcmp_lt(pset1(1e150), abs_x); - Packet result = pselect(large_mask, p_large, pselect(small_mask, p_small, p_med)); + const Packet x2 = pmul(abs_x, pselect(large_mask, pzero(abs_x), abs_x)); + Packet normal_arg = padd(abs_x, pdiv(x2, padd(one, psqrt(padd(one, x2))))); + Packet large_arg = psub(abs_x, one); + Packet result = generic_log1p(pselect(large_mask, large_arg, normal_arg)); + const Packet ln2 = pset1(0.6931471805599453); + result = pselect(large_mask, padd(result, ln2), result); return por(x_sign, result); } /** \internal \returns the inverse hyperbolic cosine of \a x (coeff-wise). - Uses acosh(x) = log(x + sqrt(x^2 - 1)) for x >= 1. + Uses a single log1p call by selecting the argument before the transcendental: + For moderate x: log1p(t + sqrt(t*(t+2))) where t = x - 1 + For huge x: log1p(t) + ln2 (avoids t*(t+2) overflow) Returns NaN for x < 1. */ template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacosh_float(const Packet& x) { const Packet one = pset1(1.0f); - // For x near 1, use log1p to avoid cancellation: - // acosh(x) = log(x + sqrt(x^2-1)) = log(x + sqrt((x-1)(x+1))) - // For x close to 1, let t = x-1, then: - // acosh(x) = log1p(t + sqrt(t*(t+2))) - const Packet t = psub(x, one); - const Packet small_mask = pcmp_lt(t, pset1(0.5f)); - - // Small path: acosh(x) = log1p(t + sqrt(t*(t+2))) const Packet two = pset1(2.0f); - Packet p_small = generic_log1p(padd(t, psqrt(pmul(t, padd(t, two))))); - - // Large path: acosh(x) = log(x + sqrt(x^2-1)) - // For very large x, use log(2*x) to avoid overflow in x^2. - const Packet large_threshold = pset1(1e10f); - const Packet huge_mask = pcmp_lt(large_threshold, x); - const Packet x2_safe = pselect(huge_mask, one, pmul(x, x)); - Packet p_large = plog(padd(x, psqrt(psub(x2_safe, one)))); - const Packet log2 = pset1(0.6931471805599453f); - p_large = pselect(huge_mask, padd(plog(x), log2), p_large); - - Packet result = pselect(small_mask, p_small, p_large); - + const Packet t = psub(x, one); + const Packet huge_mask = pcmp_lt(pset1(1e10f), x); + // Guard t*(t+2) against overflow in the huge case. + const Packet t_tp2 = pmul(pselect(huge_mask, pzero(t), t), padd(t, two)); + Packet normal_arg = padd(t, psqrt(t_tp2)); + // For huge x: acosh(x) = log(2x) = log1p(x - 1) + ln2. + Packet huge_arg = t; + // Select argument, then call log1p once. + Packet result = generic_log1p(pselect(huge_mask, huge_arg, normal_arg)); + const Packet ln2 = pset1(0.6931471805599453f); + result = pselect(huge_mask, padd(result, ln2), result); // Return NaN for x < 1. const Packet invalid_mask = pcmp_lt(x, one); return por(invalid_mask, result); @@ -1064,23 +1052,15 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacosh_float(const Pa template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacosh_double(const Packet& x) { const Packet one = pset1(1.0); - const Packet t = psub(x, one); - const Packet small_mask = pcmp_lt(t, pset1(0.5)); - - // Small path: acosh(x) = log1p(t + sqrt(t*(t+2))) const Packet two = pset1(2.0); - Packet p_small = generic_log1p(padd(t, psqrt(pmul(t, padd(t, two))))); - - // Large path: acosh(x) = log(x + sqrt(x^2-1)) - const Packet large_threshold = pset1(1e150); - const Packet huge_mask = pcmp_lt(large_threshold, x); - const Packet x2_safe = pselect(huge_mask, one, pmul(x, x)); - Packet p_large = plog(padd(x, psqrt(psub(x2_safe, one)))); - const Packet log2 = pset1(0.6931471805599453); - p_large = pselect(huge_mask, padd(plog(x), log2), p_large); - - Packet result = pselect(small_mask, p_small, p_large); - + const Packet t = psub(x, one); + const Packet huge_mask = pcmp_lt(pset1(1e150), x); + const Packet t_tp2 = pmul(pselect(huge_mask, pzero(t), t), padd(t, two)); + Packet normal_arg = padd(t, psqrt(t_tp2)); + Packet huge_arg = t; + Packet result = generic_log1p(pselect(huge_mask, huge_arg, normal_arg)); + const Packet ln2 = pset1(0.6931471805599453); + result = pselect(huge_mask, padd(result, ln2), result); const Packet invalid_mask = pcmp_lt(x, one); return por(invalid_mask, result); } diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index d7818398a..fb222b846 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -204,6 +204,8 @@ struct packet_traits : default_packet_traits { HasATanh = 1, HasSinh = 1, HasCosh = 1, + HasASinh = 1, + HasACosh = 1, HasLog = 1, HasLog10 = 1, HasExp = 1, @@ -5051,6 +5053,8 @@ struct packet_traits : default_packet_traits { HasATanh = 1, HasSinh = 1, HasCosh = 1, + HasASinh = 1, + HasACosh = 1, #endif HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index e83240e4f..695a173c1 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -192,6 +192,8 @@ struct packet_traits : default_packet_traits { HasATanh = 1, HasSinh = 1, HasCosh = 1, + HasASinh = 1, + HasACosh = 1, HasLog = 1, HasLog1p = 1, HasLog10 = 1, @@ -225,6 +227,8 @@ struct packet_traits : default_packet_traits { HasTan = EIGEN_FAST_MATH, HasSinh = 1, HasCosh = 1, + HasASinh = 1, + HasACosh = 1, HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, HasErfc = EIGEN_FAST_MATH, diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 99cda3d1e..b92607d29 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -765,11 +765,15 @@ struct functor_traits> { template struct scalar_asinh_op { EIGEN_DEVICE_FUNC constexpr inline Scalar operator()(const Scalar& a) const { return numext::asinh(a); } + template + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { + return internal::pasinh(a); + } }; template struct functor_traits> { - enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; + enum { Cost = 5 * NumTraits::MulCost, PacketAccess = packet_traits::HasASinh }; }; /** \internal @@ -796,11 +800,15 @@ struct functor_traits> { template struct scalar_acosh_op { EIGEN_DEVICE_FUNC constexpr inline Scalar operator()(const Scalar& a) const { return numext::acosh(a); } + template + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { + return internal::pacosh(a); + } }; template struct functor_traits> { - enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; + enum { Cost = 5 * NumTraits::MulCost, PacketAccess = packet_traits::HasACosh }; }; /** \internal diff --git a/benchmarks/Core/bench_cwise_math.cpp b/benchmarks/Core/bench_cwise_math.cpp index f24b45c2d..3ecb77b37 100644 --- a/benchmarks/Core/bench_cwise_math.cpp +++ b/benchmarks/Core/bench_cwise_math.cpp @@ -52,6 +52,8 @@ BENCH_CWISE_UNARY(Atan, a.atan(), -10, 10) BENCH_CWISE_UNARY(Sinh, a.sinh(), -5, 5) BENCH_CWISE_UNARY(Cosh, a.cosh(), -5, 5) BENCH_CWISE_UNARY(Tanh, a.tanh(), -5, 5) +BENCH_CWISE_UNARY(Asinh, a.asinh(), -5, 5) +BENCH_CWISE_UNARY(Acosh, a.acosh(), 1.01, 10) BENCH_CWISE_UNARY(Atanh, a.atanh(), -0.99, 0.99) BENCH_CWISE_UNARY(Log10, a.log10(), 0.01, 100) BENCH_CWISE_UNARY(Erf, Eigen::erf(a), -4, 4) @@ -155,6 +157,8 @@ BENCHMARK(BM_Atan) CWISE_SIZES ->Name("Atan_float"); BENCHMARK(BM_Sinh) CWISE_SIZES ->Name("Sinh_float"); BENCHMARK(BM_Cosh) CWISE_SIZES ->Name("Cosh_float"); BENCHMARK(BM_Tanh) CWISE_SIZES ->Name("Tanh_float"); +BENCHMARK(BM_Asinh) CWISE_SIZES ->Name("Asinh_float"); +BENCHMARK(BM_Acosh) CWISE_SIZES ->Name("Acosh_float"); BENCHMARK(BM_Atanh) CWISE_SIZES ->Name("Atanh_float"); BENCHMARK(BM_Log10) CWISE_SIZES ->Name("Log10_float"); BENCHMARK(BM_Erf) CWISE_SIZES ->Name("Erf_float"); @@ -188,6 +192,8 @@ BENCHMARK(BM_Atan) CWISE_SIZES ->Name("Atan_double"); BENCHMARK(BM_Sinh) CWISE_SIZES ->Name("Sinh_double"); BENCHMARK(BM_Cosh) CWISE_SIZES ->Name("Cosh_double"); BENCHMARK(BM_Tanh) CWISE_SIZES ->Name("Tanh_double"); +BENCHMARK(BM_Asinh) CWISE_SIZES ->Name("Asinh_double"); +BENCHMARK(BM_Acosh) CWISE_SIZES ->Name("Acosh_double"); BENCHMARK(BM_Atanh) CWISE_SIZES ->Name("Atanh_double"); BENCHMARK(BM_Log10) CWISE_SIZES ->Name("Log10_double"); BENCHMARK(BM_Erf) CWISE_SIZES ->Name("Erf_double");