mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Fix AVX double-precision trig and complex exp without AVX2
libeigen/eigen!2147 Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
This commit is contained in:
@@ -454,7 +454,15 @@ EIGEN_STRONG_INLINE Packet4cf plog<Packet4cf>(const Packet4cf& a) {
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cd pexp<Packet2cd>(const Packet2cd& a) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
return pexp_complex<Packet2cd>(a);
|
||||
#else
|
||||
// Without AVX2, pexp_complex<Packet2cd> requires psincos_double<Packet4d> which needs
|
||||
// 256-bit integer operations (Packet4l) not available on AVX-only targets.
|
||||
// Process as two independent Packet1cd using the SSE implementation instead.
|
||||
return Packet2cd(_mm256_insertf128_pd(_mm256_castpd128_pd256(pexp(Packet1cd(_mm256_castpd256_pd128(a.v))).v),
|
||||
pexp(Packet1cd(_mm256_extractf128_pd(a.v, 1))).v, 1));
|
||||
#endif
|
||||
}
|
||||
|
||||
template <>
|
||||
|
||||
@@ -33,6 +33,24 @@ EIGEN_DOUBLE_PACKET_FUNCTION(cbrt, Packet4d)
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(sin, Packet4d)
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(cos, Packet4d)
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(tan, Packet4d)
|
||||
#else
|
||||
// Without AVX2, psincos_double<Packet4d> requires 256-bit integer operations (Packet4l)
|
||||
// that are not available. Process as two Packet2d halves using the SSE implementation.
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d psin<Packet4d>(const Packet4d& x) {
|
||||
return _mm256_insertf128_pd(_mm256_castpd128_pd256(psin(_mm256_castpd256_pd128(x))),
|
||||
psin(_mm256_extractf128_pd(x, 1)), 1);
|
||||
}
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d pcos<Packet4d>(const Packet4d& x) {
|
||||
return _mm256_insertf128_pd(_mm256_castpd128_pd256(pcos(_mm256_castpd256_pd128(x))),
|
||||
pcos(_mm256_extractf128_pd(x, 1)), 1);
|
||||
}
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d ptan<Packet4d>(const Packet4d& x) {
|
||||
return _mm256_insertf128_pd(_mm256_castpd128_pd256(ptan(_mm256_castpd256_pd128(x))),
|
||||
ptan(_mm256_extractf128_pd(x, 1)), 1);
|
||||
}
|
||||
#endif
|
||||
EIGEN_GENERIC_PACKET_FUNCTION(atan, Packet4d)
|
||||
EIGEN_GENERIC_PACKET_FUNCTION(exp2, Packet4d)
|
||||
|
||||
Reference in New Issue
Block a user