mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Implement vectorized complex square root.
Closes #1905 Measured speedup for sqrt of `complex<float>` on Skylake: SSE: ``` name old time/op new time/op delta BM_eigen_sqrt_ctype/1 49.4ns ± 0% 54.3ns ± 0% +10.01% BM_eigen_sqrt_ctype/8 332ns ± 0% 50ns ± 1% -84.97% BM_eigen_sqrt_ctype/64 2.81µs ± 1% 0.38µs ± 0% -86.49% BM_eigen_sqrt_ctype/512 23.8µs ± 0% 3.0µs ± 0% -87.32% BM_eigen_sqrt_ctype/4k 202µs ± 0% 24µs ± 2% -88.03% BM_eigen_sqrt_ctype/32k 1.63ms ± 0% 0.19ms ± 0% -88.18% BM_eigen_sqrt_ctype/256k 13.0ms ± 0% 1.5ms ± 1% -88.20% BM_eigen_sqrt_ctype/1M 52.1ms ± 0% 6.2ms ± 0% -88.18% ``` AVX2: ``` name old cpu/op new cpu/op delta BM_eigen_sqrt_ctype/1 53.6ns ± 0% 55.6ns ± 0% +3.71% BM_eigen_sqrt_ctype/8 334ns ± 0% 27ns ± 0% -91.86% BM_eigen_sqrt_ctype/64 2.79µs ± 0% 0.22µs ± 2% -92.28% BM_eigen_sqrt_ctype/512 23.8µs ± 1% 1.7µs ± 1% -92.81% BM_eigen_sqrt_ctype/4k 201µs ± 0% 14µs ± 1% -93.24% BM_eigen_sqrt_ctype/32k 1.62ms ± 0% 0.11ms ± 1% -93.29% BM_eigen_sqrt_ctype/256k 13.0ms ± 0% 0.9ms ± 1% -93.31% BM_eigen_sqrt_ctype/1M 52.0ms ± 0% 3.5ms ± 1% -93.31% ``` AVX512: ``` name old cpu/op new cpu/op delta BM_eigen_sqrt_ctype/1 53.7ns ± 0% 56.2ns ± 1% +4.75% BM_eigen_sqrt_ctype/8 334ns ± 0% 18ns ± 2% -94.63% BM_eigen_sqrt_ctype/64 2.79µs ± 0% 0.12µs ± 1% -95.54% BM_eigen_sqrt_ctype/512 23.9µs ± 1% 1.0µs ± 1% -95.89% BM_eigen_sqrt_ctype/4k 202µs ± 0% 8µs ± 1% -96.13% BM_eigen_sqrt_ctype/32k 1.63ms ± 0% 0.06ms ± 1% -96.15% BM_eigen_sqrt_ctype/256k 13.0ms ± 0% 0.5ms ± 4% -96.11% BM_eigen_sqrt_ctype/1M 52.1ms ± 0% 2.0ms ± 1% -96.13% ```
This commit is contained in:
@@ -473,8 +473,6 @@ void packetmath() {
|
||||
CHECK_CWISE3_IF(true, internal::pselect, internal::pselect);
|
||||
}
|
||||
|
||||
CHECK_CWISE1_IF(PacketTraits::HasSqrt, numext::sqrt, internal::psqrt);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
data1[i] = internal::random<Scalar>();
|
||||
}
|
||||
@@ -486,6 +484,11 @@ void packetmath() {
|
||||
packetmath_boolean_mask_ops<Scalar, Packet>();
|
||||
packetmath_pcast_ops_runner<Scalar, Packet>::run();
|
||||
packetmath_minus_zero_add<Scalar, Packet>();
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
data1[i] = numext::abs(internal::random<Scalar>());
|
||||
}
|
||||
CHECK_CWISE1_IF(PacketTraits::HasSqrt, numext::sqrt, internal::psqrt);
|
||||
}
|
||||
|
||||
// Notice that this definition works for complex types as well.
|
||||
@@ -899,6 +902,8 @@ void test_conj_helper(Scalar* data1, Scalar* data2, Scalar* ref, Scalar* pval) {
|
||||
|
||||
template <typename Scalar, typename Packet>
|
||||
void packetmath_complex() {
|
||||
typedef internal::packet_traits<Scalar> PacketTraits;
|
||||
typedef typename Scalar::value_type RealScalar;
|
||||
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
||||
|
||||
const int size = PacketSize * 4;
|
||||
@@ -917,11 +922,55 @@ void packetmath_complex() {
|
||||
test_conj_helper<Scalar, Packet, true, false>(data1, data2, ref, pval);
|
||||
test_conj_helper<Scalar, Packet, true, true>(data1, data2, ref, pval);
|
||||
|
||||
// Test pcplxflip.
|
||||
{
|
||||
for (int i = 0; i < PacketSize; ++i) ref[i] = Scalar(std::imag(data1[i]), std::real(data1[i]));
|
||||
internal::pstore(pval, internal::pcplxflip(internal::pload<Packet>(data1)));
|
||||
VERIFY(test::areApprox(ref, pval, PacketSize) && "pcplxflip");
|
||||
}
|
||||
|
||||
if (PacketTraits::HasSqrt) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
data1[i] = Scalar(internal::random<RealScalar>(), internal::random<RealScalar>());
|
||||
}
|
||||
CHECK_CWISE1(numext::sqrt, internal::psqrt);
|
||||
|
||||
// Test misc. corner cases.
|
||||
const RealScalar zero = RealScalar(0);
|
||||
const RealScalar one = RealScalar(1);
|
||||
const RealScalar inf = std::numeric_limits<RealScalar>::infinity();
|
||||
const RealScalar nan = std::numeric_limits<RealScalar>::quiet_NaN();
|
||||
data1[0] = Scalar(zero, zero);
|
||||
data1[1] = Scalar(-zero, zero);
|
||||
data1[2] = Scalar(one, zero);
|
||||
data1[3] = Scalar(zero, one);
|
||||
CHECK_CWISE1(numext::sqrt, internal::psqrt);
|
||||
data1[0] = Scalar(-one, zero);
|
||||
data1[1] = Scalar(zero, -one);
|
||||
data1[2] = Scalar(one, one);
|
||||
data1[3] = Scalar(-one, -one);
|
||||
CHECK_CWISE1(numext::sqrt, internal::psqrt);
|
||||
data1[0] = Scalar(inf, zero);
|
||||
data1[1] = Scalar(zero, inf);
|
||||
data1[2] = Scalar(-inf, zero);
|
||||
data1[3] = Scalar(zero, -inf);
|
||||
CHECK_CWISE1(numext::sqrt, internal::psqrt);
|
||||
data1[0] = Scalar(inf, inf);
|
||||
data1[1] = Scalar(-inf, inf);
|
||||
data1[2] = Scalar(inf, -inf);
|
||||
data1[3] = Scalar(-inf, -inf);
|
||||
CHECK_CWISE1(numext::sqrt, internal::psqrt);
|
||||
data1[0] = Scalar(nan, zero);
|
||||
data1[1] = Scalar(zero, nan);
|
||||
data1[2] = Scalar(nan, one);
|
||||
data1[3] = Scalar(one, nan);
|
||||
CHECK_CWISE1(numext::sqrt, internal::psqrt);
|
||||
data1[0] = Scalar(nan, nan);
|
||||
data1[1] = Scalar(inf, nan);
|
||||
data1[2] = Scalar(nan, inf);
|
||||
data1[3] = Scalar(-inf, nan);
|
||||
CHECK_CWISE1(numext::sqrt, internal::psqrt);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Scalar, typename Packet>
|
||||
|
||||
Reference in New Issue
Block a user