mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
1. Fix a bug in psqrt and make it return 0 for +inf arguments.
2. Simplify handling of special cases by taking advantage of the fact that the builtin vrsqrt approximation handles negative, zero and +inf arguments correctly. This speeds up the SSE and AVX implementations by ~20%. 3. Make the Newton-Raphson formula used for rsqrt more numerically robust: Before: y = y * (1.5 - x/2 * y^2) After: y = y * (1.5 - y * (x/2) * y) Forming y^2 can overflow for very large or very small (denormalized) values of x, while x*y ~= 1. For AVX512, this makes it possible to compute accurate results for denormal inputs down to ~1e-42 in single precision. 4. Add a faster double precision implementation for Knights Landing using the vrsqrt28 instruction and a single Newton-Raphson iteration. Benchmark results: https://bitbucket.org/snippets/rmlarsen/5LBq9o
This commit is contained in:
@@ -605,9 +605,8 @@ template<typename Scalar,typename Packet> void packetmath_real()
|
||||
}
|
||||
|
||||
if(internal::random<float>(0,1)<0.1f)
|
||||
data1[internal::random<int>(0, PacketSize)] = 0;
|
||||
data1[internal::random<int>(0, PacketSize)] = 0;
|
||||
CHECK_CWISE1_IF(PacketTraits::HasSqrt, std::sqrt, internal::psqrt);
|
||||
CHECK_CWISE1_IF(PacketTraits::HasSqrt, Scalar(1)/std::sqrt, internal::prsqrt);
|
||||
CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog);
|
||||
CHECK_CWISE1_IF(PacketTraits::HasBessel, numext::bessel_i0, internal::pbessel_i0);
|
||||
CHECK_CWISE1_IF(PacketTraits::HasBessel, numext::bessel_i0e, internal::pbessel_i0e);
|
||||
@@ -616,6 +615,9 @@ template<typename Scalar,typename Packet> void packetmath_real()
|
||||
CHECK_CWISE1_IF(PacketTraits::HasBessel, numext::bessel_j0, internal::pbessel_j0);
|
||||
CHECK_CWISE1_IF(PacketTraits::HasBessel, numext::bessel_j1, internal::pbessel_j1);
|
||||
|
||||
data1[0] = std::numeric_limits<Scalar>::infinity();
|
||||
CHECK_CWISE1_IF(PacketTraits::HasRsqrt, Scalar(1)/std::sqrt, internal::prsqrt);
|
||||
|
||||
// Use a smaller data range for the positive bessel operations as these
|
||||
// can have much more error at very small and very large values.
|
||||
for (int i=0; i<size; ++i) {
|
||||
|
||||
Reference in New Issue
Block a user