Fix AVX double-precision trig and complex exp without AVX2

libeigen/eigen!2147

Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
This commit is contained in:
Rasmus Munk Larsen
2026-02-17 19:48:16 -08:00
parent 50d6d92a70
commit 740cac97b4
2 changed files with 26 additions and 0 deletions

View File

@@ -454,7 +454,15 @@ EIGEN_STRONG_INLINE Packet4cf plog<Packet4cf>(const Packet4cf& a) {
template <>
EIGEN_STRONG_INLINE Packet2cd pexp<Packet2cd>(const Packet2cd& a) {
#ifdef EIGEN_VECTORIZE_AVX2
return pexp_complex<Packet2cd>(a);
#else
// Without AVX2, pexp_complex<Packet2cd> requires psincos_double<Packet4d> which needs
// 256-bit integer operations (Packet4l) not available on AVX-only targets.
// Process as two independent Packet1cd using the SSE implementation instead.
return Packet2cd(_mm256_insertf128_pd(_mm256_castpd128_pd256(pexp(Packet1cd(_mm256_castpd256_pd128(a.v))).v),
pexp(Packet1cd(_mm256_extractf128_pd(a.v, 1))).v, 1));
#endif
}
template <>

View File

@@ -33,6 +33,24 @@ EIGEN_DOUBLE_PACKET_FUNCTION(cbrt, Packet4d)
EIGEN_DOUBLE_PACKET_FUNCTION(sin, Packet4d)
EIGEN_DOUBLE_PACKET_FUNCTION(cos, Packet4d)
EIGEN_DOUBLE_PACKET_FUNCTION(tan, Packet4d)
#else
// Without AVX2, psincos_double<Packet4d> requires 256-bit integer operations (Packet4l)
// that are not available. Process as two Packet2d halves using the SSE implementation.
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d psin<Packet4d>(const Packet4d& x) {
return _mm256_insertf128_pd(_mm256_castpd128_pd256(psin(_mm256_castpd256_pd128(x))),
psin(_mm256_extractf128_pd(x, 1)), 1);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d pcos<Packet4d>(const Packet4d& x) {
return _mm256_insertf128_pd(_mm256_castpd128_pd256(pcos(_mm256_castpd256_pd128(x))),
pcos(_mm256_extractf128_pd(x, 1)), 1);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d ptan<Packet4d>(const Packet4d& x) {
return _mm256_insertf128_pd(_mm256_castpd128_pd256(ptan(_mm256_castpd256_pd128(x))),
ptan(_mm256_extractf128_pd(x, 1)), 1);
}
#endif
EIGEN_GENERIC_PACKET_FUNCTION(atan, Packet4d)
EIGEN_GENERIC_PACKET_FUNCTION(exp2, Packet4d)