Vectorize asinh and acosh for float and double

libeigen/eigen!2376 Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
2026-04-10 11:34:33 +08:00 · 2026-04-01 21:46:36 -07:00
parent 9513d3878e
commit d31a73437f
7 changed files with 77 additions and 67 deletions
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -119,6 +119,8 @@ struct packet_traits<float> : default_packet_traits {
    HasATanh = 1,
    HasSinh = 1,
    HasCosh = 1,
+    HasASinh = 1,
+    HasACosh = 1,
    HasLog = 1,
    HasLog10 = 1,
    HasExp = 1,
@@ -153,6 +155,8 @@ struct packet_traits<double> : default_packet_traits {
 #endif
    HasSinh = 1,
    HasCosh = 1,
+    HasASinh = 1,
+    HasACosh = 1,
    HasTanh = EIGEN_FAST_MATH,
    HasErf = 1,
    HasErfc = 1,
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -174,6 +174,8 @@ struct packet_traits<float> : default_packet_traits {
    HasATanh = 1,
    HasSinh = 1,
    HasCosh = 1,
+    HasASinh = 1,
+    HasACosh = 1,
    HasSqrt = 1,
    HasRsqrt = 1,
    HasCbrt = 1,
@@ -209,6 +211,8 @@ struct packet_traits<double> : default_packet_traits {
    HasTan = EIGEN_FAST_MATH,
    HasSinh = 1,
    HasCosh = 1,
+    HasASinh = 1,
+    HasACosh = 1,
    HasLog = 1,
    HasLog10 = 1,
    HasExp = 1,
--- a/Eigen/src/Core/arch/Default/GenericPacketMathTrig.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathTrig.h
@@ -977,9 +977,9 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcosh_double(const Pa
 //----------------------------------------------------------------------

 /** \internal \returns the inverse hyperbolic sine of \a x (coeff-wise).
-    For small |x|: asinh(x) = sign(x) * log1p(|x| + x^2/(1 + sqrt(1 + x^2)))
-    For large |x|: asinh(x) = sign(x) * (log(|x|) + ln(2))
-    Otherwise:     asinh(x) = sign(x) * log(|x| + sqrt(x^2 + 1))
+    Uses a single log1p call by selecting the argument before the transcendental:
+    For moderate |x|: log1p(|x| + x^2 / (1 + sqrt(1 + x^2)))
+    For large |x|:    log1p(|x| - 1) + ln2  (avoids x^2 overflow)
 */
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasinh_float(const Packet& x) {
@@ -988,21 +988,21 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasinh_float(const Pa
  const Packet x_sign = pand(x, sign_mask);
  const Packet one = pset1<Packet>(1.0f);

-  // For |x| < 0.5, use log1p formulation to avoid cancellation:
-  // asinh(x) = log1p(|x| + x^2 / (1 + sqrt(1 + x^2)))
-  const Packet x2 = pmul(abs_x, abs_x);
-  Packet p_small = generic_log1p(padd(abs_x, pdiv(x2, padd(one, psqrt(padd(one, x2))))));
-
-  // For 0.5 <= |x| < 1e10, use log(|x| + sqrt(x^2 + 1)).
-  Packet p_med = plog(padd(abs_x, psqrt(padd(x2, one))));
-
-  // For |x| >= 1e10, use log(2*|x|) = log(|x|) + ln(2) to avoid x^2 overflow.
-  const Packet ln2 = pset1<Packet>(0.6931471805599453f);
-  Packet p_large = padd(plog(abs_x), ln2);
-
-  const Packet small_mask = pcmp_lt(abs_x, pset1<Packet>(0.5f));
+  // For |x| >= 1e10, use log(2|x|) = log1p(|x| - 1) + ln2 to avoid x^2 overflow.
  const Packet large_mask = pcmp_lt(pset1<Packet>(1e10f), abs_x);
-  Packet result = pselect(large_mask, p_large, pselect(small_mask, p_small, p_med));
+  // Guard x^2 against overflow in the large case.
+  const Packet x2 = pmul(abs_x, pselect(large_mask, pzero(abs_x), abs_x));
+  // For |x| < 1e10: log1p(|x| + x^2 / (1 + sqrt(1 + x^2))).
+  // Algebraically equivalent to log(|x| + sqrt(x^2 + 1))
+  // but avoids cancellation for small |x|.
+  Packet normal_arg = padd(abs_x, pdiv(x2, padd(one, psqrt(padd(one, x2)))));
+  // For |x| >= 1e10: log1p(|x| - 1), then add ln2 after.
+  Packet large_arg = psub(abs_x, one);
+  // Select argument, then call log1p once.
+  Packet result = generic_log1p(pselect(large_mask, large_arg, normal_arg));
+  // Add ln2 for the large path: log(2|x|) = log(|x|) + ln2 = log1p(|x|-1) + ln2.
+  const Packet ln2 = pset1<Packet>(0.6931471805599453f);
+  result = pselect(large_mask, padd(result, ln2), result);
  return por(x_sign, result);
 }

@@ -1013,49 +1013,37 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasinh_double(const P
  const Packet x_sign = pand(x, sign_mask);
  const Packet one = pset1<Packet>(1.0);

-  const Packet x2 = pmul(abs_x, abs_x);
-  Packet p_small = generic_log1p(padd(abs_x, pdiv(x2, padd(one, psqrt(padd(one, x2))))));
-
-  Packet p_med = plog(padd(abs_x, psqrt(padd(x2, one))));
-
-  const Packet ln2 = pset1<Packet>(0.6931471805599453);
-  Packet p_large = padd(plog(abs_x), ln2);
-
-  const Packet small_mask = pcmp_lt(abs_x, pset1<Packet>(0.5));
  const Packet large_mask = pcmp_lt(pset1<Packet>(1e150), abs_x);
-  Packet result = pselect(large_mask, p_large, pselect(small_mask, p_small, p_med));
+  const Packet x2 = pmul(abs_x, pselect(large_mask, pzero(abs_x), abs_x));
+  Packet normal_arg = padd(abs_x, pdiv(x2, padd(one, psqrt(padd(one, x2)))));
+  Packet large_arg = psub(abs_x, one);
+  Packet result = generic_log1p(pselect(large_mask, large_arg, normal_arg));
+  const Packet ln2 = pset1<Packet>(0.6931471805599453);
+  result = pselect(large_mask, padd(result, ln2), result);
  return por(x_sign, result);
 }

 /** \internal \returns the inverse hyperbolic cosine of \a x (coeff-wise).
-    Uses acosh(x) = log(x + sqrt(x^2 - 1)) for x >= 1.
+    Uses a single log1p call by selecting the argument before the transcendental:
+    For moderate x: log1p(t + sqrt(t*(t+2))) where t = x - 1
+    For huge x:     log1p(t) + ln2  (avoids t*(t+2) overflow)
    Returns NaN for x < 1.
 */
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacosh_float(const Packet& x) {
  const Packet one = pset1<Packet>(1.0f);
-  // For x near 1, use log1p to avoid cancellation:
-  // acosh(x) = log(x + sqrt(x^2-1)) = log(x + sqrt((x-1)(x+1)))
-  // For x close to 1, let t = x-1, then:
-  // acosh(x) = log1p(t + sqrt(t*(t+2)))
-  const Packet t = psub(x, one);
-  const Packet small_mask = pcmp_lt(t, pset1<Packet>(0.5f));
-
-  // Small path: acosh(x) = log1p(t + sqrt(t*(t+2)))
  const Packet two = pset1<Packet>(2.0f);
-  Packet p_small = generic_log1p(padd(t, psqrt(pmul(t, padd(t, two)))));
-
-  // Large path: acosh(x) = log(x + sqrt(x^2-1))
-  // For very large x, use log(2*x) to avoid overflow in x^2.
-  const Packet large_threshold = pset1<Packet>(1e10f);
-  const Packet huge_mask = pcmp_lt(large_threshold, x);
-  const Packet x2_safe = pselect(huge_mask, one, pmul(x, x));
-  Packet p_large = plog(padd(x, psqrt(psub(x2_safe, one))));
-  const Packet log2 = pset1<Packet>(0.6931471805599453f);
-  p_large = pselect(huge_mask, padd(plog(x), log2), p_large);
-
-  Packet result = pselect(small_mask, p_small, p_large);
-
+  const Packet t = psub(x, one);
+  const Packet huge_mask = pcmp_lt(pset1<Packet>(1e10f), x);
+  // Guard t*(t+2) against overflow in the huge case.
+  const Packet t_tp2 = pmul(pselect(huge_mask, pzero(t), t), padd(t, two));
+  Packet normal_arg = padd(t, psqrt(t_tp2));
+  // For huge x: acosh(x) = log(2x) = log1p(x - 1) + ln2.
+  Packet huge_arg = t;
+  // Select argument, then call log1p once.
+  Packet result = generic_log1p(pselect(huge_mask, huge_arg, normal_arg));
+  const Packet ln2 = pset1<Packet>(0.6931471805599453f);
+  result = pselect(huge_mask, padd(result, ln2), result);
  // Return NaN for x < 1.
  const Packet invalid_mask = pcmp_lt(x, one);
  return por(invalid_mask, result);
@@ -1064,23 +1052,15 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacosh_float(const Pa
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacosh_double(const Packet& x) {
  const Packet one = pset1<Packet>(1.0);
-  const Packet t = psub(x, one);
-  const Packet small_mask = pcmp_lt(t, pset1<Packet>(0.5));
-
-  // Small path: acosh(x) = log1p(t + sqrt(t*(t+2)))
  const Packet two = pset1<Packet>(2.0);
-  Packet p_small = generic_log1p(padd(t, psqrt(pmul(t, padd(t, two)))));
-
-  // Large path: acosh(x) = log(x + sqrt(x^2-1))
-  const Packet large_threshold = pset1<Packet>(1e150);
-  const Packet huge_mask = pcmp_lt(large_threshold, x);
-  const Packet x2_safe = pselect(huge_mask, one, pmul(x, x));
-  Packet p_large = plog(padd(x, psqrt(psub(x2_safe, one))));
-  const Packet log2 = pset1<Packet>(0.6931471805599453);
-  p_large = pselect(huge_mask, padd(plog(x), log2), p_large);
-
-  Packet result = pselect(small_mask, p_small, p_large);
-
+  const Packet t = psub(x, one);
+  const Packet huge_mask = pcmp_lt(pset1<Packet>(1e150), x);
+  const Packet t_tp2 = pmul(pselect(huge_mask, pzero(t), t), padd(t, two));
+  Packet normal_arg = padd(t, psqrt(t_tp2));
+  Packet huge_arg = t;
+  Packet result = generic_log1p(pselect(huge_mask, huge_arg, normal_arg));
+  const Packet ln2 = pset1<Packet>(0.6931471805599453);
+  result = pselect(huge_mask, padd(result, ln2), result);
  const Packet invalid_mask = pcmp_lt(x, one);
  return por(invalid_mask, result);
 }
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -204,6 +204,8 @@ struct packet_traits<float> : default_packet_traits {
    HasATanh = 1,
    HasSinh = 1,
    HasCosh = 1,
+    HasASinh = 1,
+    HasACosh = 1,
    HasLog = 1,
    HasLog10 = 1,
    HasExp = 1,
@@ -5051,6 +5053,8 @@ struct packet_traits<double> : default_packet_traits {
    HasATanh = 1,
    HasSinh = 1,
    HasCosh = 1,
+    HasASinh = 1,
+    HasACosh = 1,
 #endif
    HasSin = EIGEN_FAST_MATH,
    HasCos = EIGEN_FAST_MATH,
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -192,6 +192,8 @@ struct packet_traits<float> : default_packet_traits {
    HasATanh = 1,
    HasSinh = 1,
    HasCosh = 1,
+    HasASinh = 1,
+    HasACosh = 1,
    HasLog = 1,
    HasLog1p = 1,
    HasLog10 = 1,
@@ -225,6 +227,8 @@ struct packet_traits<double> : default_packet_traits {
    HasTan = EIGEN_FAST_MATH,
    HasSinh = 1,
    HasCosh = 1,
+    HasASinh = 1,
+    HasACosh = 1,
    HasTanh = EIGEN_FAST_MATH,
    HasErf = EIGEN_FAST_MATH,
    HasErfc = EIGEN_FAST_MATH,
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -765,11 +765,15 @@ struct functor_traits<scalar_sinh_op<Scalar>> {
 template <typename Scalar>
 struct scalar_asinh_op {
  EIGEN_DEVICE_FUNC constexpr inline Scalar operator()(const Scalar& a) const { return numext::asinh(a); }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const {
+    return internal::pasinh(a);
+  }
 };

 template <typename Scalar>
 struct functor_traits<scalar_asinh_op<Scalar>> {
-  enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
+  enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasASinh };
 };

 /** \internal
@@ -796,11 +800,15 @@ struct functor_traits<scalar_cosh_op<Scalar>> {
 template <typename Scalar>
 struct scalar_acosh_op {
  EIGEN_DEVICE_FUNC constexpr inline Scalar operator()(const Scalar& a) const { return numext::acosh(a); }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const {
+    return internal::pacosh(a);
+  }
 };

 template <typename Scalar>
 struct functor_traits<scalar_acosh_op<Scalar>> {
-  enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
+  enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasACosh };
 };

 /** \internal