Fix NEON sqrt for 32-bit, add prsqrt.

With !406, we accidentally broke arm 32-bit NEON builds, since
`vsqrt_f32` is only available for 64-bit.

Here we add back the `rsqrt` implementation for 32-bit, relying
on a `prsqrt` implementation with better handling of edge cases.

Note that several of the 32-bit NEON packet tests are currently
failing - either due to denormal handling (NEON versions flush
to zero, but scalar paths don't) or due to accuracy (e.g. sin/cos).
This commit is contained in:
Antonio Sanchez
2021-02-26 13:59:46 -08:00
parent fe19714f80
commit 29ebd84cb7
3 changed files with 51 additions and 2 deletions

View File

@@ -504,6 +504,7 @@ void packetmath() {
data1[i] = numext::abs(internal::random<Scalar>());
}
CHECK_CWISE1_IF(PacketTraits::HasSqrt, numext::sqrt, internal::psqrt);
CHECK_CWISE1_IF(PacketTraits::HasRsqrt, numext::rsqrt, internal::prsqrt);
}
// Notice that this definition works for complex types as well.
@@ -532,7 +533,7 @@ void packetmath_real() {
CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog);
CHECK_CWISE1_IF(PacketTraits::HasLog, log2, internal::plog2);
CHECK_CWISE1_IF(PacketTraits::HasRsqrt, 1 / std::sqrt, internal::prsqrt);
CHECK_CWISE1_IF(PacketTraits::HasRsqrt, numext::rsqrt, internal::prsqrt);
for (int i = 0; i < size; ++i) {
data1[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-3, 3)));