diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h index 5ee67a599..f4f6794a1 100644 --- a/Eigen/src/Core/arch/AVX/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX/MathFunctions.h @@ -32,6 +32,7 @@ EIGEN_DOUBLE_PACKET_FUNCTION(cbrt, Packet4d) #ifdef EIGEN_VECTORIZE_AVX2 EIGEN_DOUBLE_PACKET_FUNCTION(sin, Packet4d) EIGEN_DOUBLE_PACKET_FUNCTION(cos, Packet4d) +EIGEN_DOUBLE_PACKET_FUNCTION(tan, Packet4d) #endif EIGEN_GENERIC_PACKET_FUNCTION(atan, Packet4d) EIGEN_GENERIC_PACKET_FUNCTION(exp2, Packet4d) diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 318b37551..eafff3d8c 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -110,6 +110,7 @@ struct packet_traits : default_packet_traits { HasReciprocal = EIGEN_FAST_MATH, HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, + HasTan = EIGEN_FAST_MATH, HasACos = 1, HasASin = 1, HasATan = 1, @@ -143,6 +144,7 @@ struct packet_traits : default_packet_traits { #ifdef EIGEN_VECTORIZE_AVX2 HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, + HasTan = EIGEN_FAST_MATH, #endif HasTanh = EIGEN_FAST_MATH, HasErf = 1, diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index ddc766bf6..c69ba159d 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -119,6 +119,7 @@ struct packet_traits : default_packet_traits { HasConj = 1, HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, + HasTan = EIGEN_FAST_MATH, HasACos = 1, HasASin = 1, HasATan = 1, @@ -154,6 +155,7 @@ struct packet_traits : default_packet_traits { HasCbrt = 1, HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, + HasTan = EIGEN_FAST_MATH, HasLog = 1, HasExp = 1, HasLog1p = 1, diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index c98f21767..acc204846 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -178,6 +178,7 @@ struct packet_traits : default_packet_traits { HasAbs = 1, HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, + HasTan = EIGEN_FAST_MATH, HasACos = 1, HasASin = 1, HasATan = 1, @@ -3098,6 +3099,7 @@ struct packet_traits : default_packet_traits { HasAbs = 1, HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, + HasTan = EIGEN_FAST_MATH, HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, HasErfc = EIGEN_FAST_MATH, diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 487127b79..13cdba759 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -773,6 +773,11 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp_double(const Pac return pselect(zero_mask, cst_zero, pmax(pldexp(x, fx), _x)); } +// Enum for selecting which function to compute. SinCos is intended to compute +// pairs of Sin and Cos of the even entries in the packet, e.g. +// SinCos([a, *, b, *]) = [sin(a), cos(a), sin(b), cos(b)]. +enum class TrigFunction : uint8_t { Sin, Cos, Tan, SinCos }; + // The following code is inspired by the following stack-overflow answer: // https://stackoverflow.com/questions/30463616/payne-hanek-algorithm-implementation-in-c/30465751#30465751 // It has been largely optimized: @@ -829,7 +834,7 @@ inline float trig_reduce_huge(float xf, Eigen::numext::int32_t* quadrant) { return float(double(int64_t(p)) * pio2_62); } -template +template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS #if EIGEN_COMP_GNUC_STRICT __attribute__((optimize("-fno-unsafe-math-optimizations"))) @@ -859,7 +864,7 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS #if defined(EIGEN_VECTORIZE_FMA) // This version requires true FMA for high accuracy. // It provides a max error of 1ULP up to (with absolute_error < 5.9605e-08): - const float huge_th = ComputeSine ? 117435.992f : 71476.0625f; + constexpr float huge_th = (Func == TrigFunction::Sin) ? 117435.992f : 71476.0625f; x = pmadd(y, pset1(-1.57079601287841796875f), x); x = pmadd(y, pset1(-3.1391647326017846353352069854736328125e-07f), x); x = pmadd(y, pset1(-5.390302529957764765544681040410068817436695098876953125e-15f), x); @@ -870,7 +875,7 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS // The following set of coefficients maintain 1ULP up to 9.43 and 14.16 for sin and cos respectively. // and 2 ULP up to: - const float huge_th = ComputeSine ? 25966.f : 18838.f; + constexpr float huge_th = (Func == TrigFunction::Sin) ? 25966.f : 18838.f; x = pmadd(y, pset1(-1.5703125), x); // = 0xbfc90000 EIGEN_OPTIMIZATION_BARRIER(x) x = pmadd(y, pset1(-0.000483989715576171875), x); // = 0xb9fdc000 @@ -908,13 +913,6 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS y_int = ploadu(y_int2); } - // Compute the sign to apply to the polynomial. - // sin: sign = second_bit(y_int) xor signbit(_x) - // cos: sign = second_bit(y_int+1) - Packet sign_bit = ComputeSine ? pxor(_x, preinterpret(plogical_shift_left<30>(y_int))) - : preinterpret(plogical_shift_left<30>(padd(y_int, csti_1))); - sign_bit = pand(sign_bit, cst_sign_mask); // clear all but left most bit - // Get the polynomial selection mask from the second bit of y_int // We'll calculate both (sin and cos) polynomials and then select from the two. Packet poly_mask = preinterpret(pcmp_eq(pand(y_int, csti_1), pzero(y_int))); @@ -943,7 +941,15 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS y2 = pmadd(y2, x, x); // Select the correct result from the two polynomials. - if (ComputeBoth) { + // Compute the sign to apply to the polynomial. + // sin: sign = second_bit(y_int) xor signbit(_x) + // cos: sign = second_bit(y_int+1) + Packet sign_bit = (Func == TrigFunction::Sin) ? pxor(_x, preinterpret(plogical_shift_left<30>(y_int))) + : preinterpret(plogical_shift_left<30>(padd(y_int, csti_1))); + sign_bit = pand(sign_bit, cst_sign_mask); // clear all but left most bit + + if ((Func == TrigFunction::SinCos) || (Func == TrigFunction::Tan)) { + // TODO(rmlarsen): Add single polynomial for tan(x) instead of paying for sin+cos+div. Packet peven = peven_mask(x); Packet ysin = pselect(poly_mask, y2, y1); Packet ycos = pselect(poly_mask, y1, y2); @@ -951,23 +957,28 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet sign_bit_cos = preinterpret(plogical_shift_left<30>(padd(y_int, csti_1))); sign_bit_sin = pand(sign_bit_sin, cst_sign_mask); // clear all but left most bit sign_bit_cos = pand(sign_bit_cos, cst_sign_mask); // clear all but left most bit - y = pselect(peven, pxor(ysin, sign_bit_sin), pxor(ycos, sign_bit_cos)); + y = (Func == TrigFunction::SinCos) ? pselect(peven, pxor(ysin, sign_bit_sin), pxor(ycos, sign_bit_cos)) + : pdiv(pxor(ysin, sign_bit_sin), pxor(ycos, sign_bit_cos)); } else { - y = ComputeSine ? pselect(poly_mask, y2, y1) : pselect(poly_mask, y1, y2); + y = (Func == TrigFunction::Sin) ? pselect(poly_mask, y2, y1) : pselect(poly_mask, y1, y2); y = pxor(y, sign_bit); } - // Update the sign and filter huge inputs return y; } template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin_float(const Packet& x) { - return psincos_float(x); + return psincos_float(x); } template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos_float(const Packet& x) { - return psincos_float(x); + return psincos_float(x); +} + +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptan_float(const Packet& x) { + return psincos_float(x); } // Trigonometric argument reduction for double for inputs smaller than 15. @@ -1007,7 +1018,7 @@ Packet trig_reduce_medium_double(const Packet& x, const Packet& q_high, const Pa return t; } -template +template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS #if EIGEN_COMP_GNUC_STRICT __attribute__((optimize("-fno-unsafe-math-optimizations"))) @@ -1094,19 +1105,26 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet sign_sin = pxor(x, preinterpret(plogical_shift_left<62>(q_int))); Packet sign_cos = preinterpret(plogical_shift_left<62>(padd(q_int, cst_one))); Packet sign_bit, sFinalRes; - if (ComputeBoth) { + if (Func == TrigFunction::Sin) { + sign_bit = sign_sin; + sFinalRes = pselect(poly_mask, ssin, scos); + } else if (Func == TrigFunction::Cos) { + sign_bit = sign_cos; + sFinalRes = pselect(poly_mask, scos, ssin); + } else if (Func == TrigFunction::Tan) { + // TODO(rmlarsen): Add single polynomial for tan(x) instead of paying for sin+cos+div. + sign_bit = pxor(sign_sin, sign_cos); + sFinalRes = pdiv(pselect(poly_mask, ssin, scos), pselect(poly_mask, scos, ssin)); + } else if (Func == TrigFunction::SinCos) { Packet peven = peven_mask(x); sign_bit = pselect((s), sign_sin, sign_cos); sFinalRes = pselect(pxor(peven, poly_mask), ssin, scos); - } else { - sign_bit = ComputeSine ? sign_sin : sign_cos; - sFinalRes = ComputeSine ? pselect(poly_mask, ssin, scos) : pselect(poly_mask, scos, ssin); } sign_bit = pand(sign_bit, cst_sign_mask); // clear all but left most bit sFinalRes = pxor(sFinalRes, sign_bit); // If the inputs values are higher than that a value that the argument reduction can currently address, compute them - // using std::sin and std::cos + // using the C++ standard library. // TODO Remove it when huge angle argument reduction is implemented if (EIGEN_PREDICT_FALSE(predux_any(pcmp_le(pset1(huge_th), x_abs)))) { const int PacketSize = unpacket_traits::size; @@ -1117,10 +1135,15 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS for (int k = 0; k < PacketSize; ++k) { double val = x_cpy[k]; if (std::abs(val) > huge_th && (numext::isfinite)(val)) { - if (ComputeBoth) + if (Func == TrigFunction::Sin) { + sincos_vals[k] = std::sin(val); + } else if (Func == TrigFunction::Cos) { + sincos_vals[k] = std::cos(val); + } else if (Func == TrigFunction::Tan) { + sincos_vals[k] = std::tan(val); + } else if (Func == TrigFunction::SinCos) { sincos_vals[k] = k % 2 == 0 ? std::sin(val) : std::cos(val); - else - sincos_vals[k] = ComputeSine ? std::sin(val) : std::cos(val); + } } } sFinalRes = ploadu(sincos_vals); @@ -1130,26 +1153,31 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin_double(const Packet& x) { - return psincos_double(x); + return psincos_double(x); } template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos_double(const Packet& x) { - return psincos_double(x); + return psincos_double(x); } -template +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptan_double(const Packet& x) { + return psincos_double(x); +} + +template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS std::enable_if_t::type, float>::value, Packet> psincos_selector(const Packet& x) { - return psincos_float(x); + return psincos_float(x); } -template +template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS std::enable_if_t::type, double>::value, Packet> psincos_selector(const Packet& x) { - return psincos_double(x); + return psincos_double(x); } // Generic implementation of acos(x). @@ -1599,7 +1627,7 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp_complex(const Pa // cis(y): RealPacket y = pand(odd_mask, a.v); y = por(y, pcplxflip(Packet(y)).v); - RealPacket cisy = psincos_selector(y); + RealPacket cisy = psincos_selector(y); cisy = pcplxflip(Packet(cisy)).v; // cos(y) + i * sin(y) const RealPacket cst_pos_inf = pset1(NumTraits::infinity()); diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h index 69a551757..942ae129c 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h @@ -110,6 +110,10 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin_float(const Pack template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos_float(const Packet& x); +/** \internal \returns tan(x) for single precision float */ +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptan_float(const Packet& x); + /** \internal \returns sin(x) for double precision float */ template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin_double(const Packet& x); @@ -118,6 +122,10 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin_double(const Pac template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos_double(const Packet& x); +/** \internal \returns tan(x) for double precision float */ +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptan_double(const Packet& x); + /** \internal \returns asin(x) for single precision float */ template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin_float(const Packet& x); @@ -200,6 +208,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_round(const Packet& a); #define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(PACKET) \ EIGEN_FLOAT_PACKET_FUNCTION(sin, PACKET) \ EIGEN_FLOAT_PACKET_FUNCTION(cos, PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(tan, PACKET) \ EIGEN_FLOAT_PACKET_FUNCTION(asin, PACKET) \ EIGEN_FLOAT_PACKET_FUNCTION(acos, PACKET) \ EIGEN_FLOAT_PACKET_FUNCTION(tanh, PACKET) \ @@ -216,6 +225,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_round(const Packet& a); #define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(PACKET) \ EIGEN_DOUBLE_PACKET_FUNCTION(sin, PACKET) \ EIGEN_DOUBLE_PACKET_FUNCTION(cos, PACKET) \ + EIGEN_DOUBLE_PACKET_FUNCTION(tan, PACKET) \ EIGEN_DOUBLE_PACKET_FUNCTION(log, PACKET) \ EIGEN_DOUBLE_PACKET_FUNCTION(log2, PACKET) \ EIGEN_DOUBLE_PACKET_FUNCTION(exp, PACKET) \ diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 6f93b1513..bf50697f6 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -197,6 +197,7 @@ struct packet_traits : default_packet_traits { HasDiv = 1, HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, + HasTan = EIGEN_FAST_MATH, HasACos = 1, HasASin = 1, HasATan = 1, @@ -5017,6 +5018,7 @@ struct packet_traits : default_packet_traits { #endif HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, + HasTan = EIGEN_FAST_MATH, HasSqrt = 1, HasRsqrt = 1, HasCbrt = 1, diff --git a/Eigen/src/Core/arch/RVV10/PacketMath.h b/Eigen/src/Core/arch/RVV10/PacketMath.h index 54db62634..e0e0be4a8 100644 --- a/Eigen/src/Core/arch/RVV10/PacketMath.h +++ b/Eigen/src/Core/arch/RVV10/PacketMath.h @@ -507,6 +507,7 @@ struct packet_traits : default_packet_traits { HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, + HasTan = EIGEN_FAST_MATH, HasLog = 1, HasExp = 1, HasSqrt = 1, diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 1ea23b0f5..7d53fa204 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -183,6 +183,7 @@ struct packet_traits : default_packet_traits { HasReciprocal = EIGEN_FAST_MATH, HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, + HasTan = EIGEN_FAST_MATH, HasACos = 1, HasASin = 1, HasATan = 1, @@ -216,6 +217,7 @@ struct packet_traits : default_packet_traits { HasDiv = 1, HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, + HasTan = EIGEN_FAST_MATH, HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, HasErfc = EIGEN_FAST_MATH, diff --git a/Eigen/src/Core/arch/SVE/MathFunctions.h b/Eigen/src/Core/arch/SVE/MathFunctions.h index 8c8ed84cf..59674331e 100644 --- a/Eigen/src/Core/arch/SVE/MathFunctions.h +++ b/Eigen/src/Core/arch/SVE/MathFunctions.h @@ -36,6 +36,11 @@ EIGEN_STRONG_INLINE PacketXf pcos(const PacketXf& x) { return pcos_float(x); } +template <> +EIGEN_STRONG_INLINE PacketXf ptan(const PacketXf& x) { + return ptan_float(x); +} + // Hyperbolic Tangent function. template <> EIGEN_STRONG_INLINE PacketXf ptanh(const PacketXf& x) { diff --git a/Eigen/src/Core/arch/SVE/PacketMath.h b/Eigen/src/Core/arch/SVE/PacketMath.h index 28fc62b83..39b29fa26 100644 --- a/Eigen/src/Core/arch/SVE/PacketMath.h +++ b/Eigen/src/Core/arch/SVE/PacketMath.h @@ -353,6 +353,7 @@ struct packet_traits : default_packet_traits { HasCmp = 1, HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, + HasTan = EIGEN_FAST_MATH, HasLog = 1, HasExp = 1, HasPow = 1, diff --git a/Eigen/src/Core/arch/clang/PacketMath.h b/Eigen/src/Core/arch/clang/PacketMath.h index e142264b8..19e5e8f24 100644 --- a/Eigen/src/Core/arch/clang/PacketMath.h +++ b/Eigen/src/Core/arch/clang/PacketMath.h @@ -56,6 +56,7 @@ struct generic_float_packet_traits : default_packet_traits { HasReciprocal = 1, HasSin = 1, HasCos = 1, + HasTan = 1, HasACos = 1, HasASin = 1, HasATan = 1,