Add reciprocal packet op and fast specializations for float with SSE, AVX, and AVX512.

This commit is contained in:
Rasmus Munk Larsen
2022-01-21 23:49:18 +00:00
parent 4b0926f99b
commit ea2c02060c
14 changed files with 95 additions and 63 deletions

View File

@@ -28,6 +28,10 @@ inline T REF_DIV(const T& a, const T& b) {
return a / b;
}
template <typename T>
inline T REF_RECIPROCAL(const T& a) {
return T(1) / a;
}
template <typename T>
inline T REF_ABS_DIFF(const T& a, const T& b) {
return a > b ? a - b : b - a;
}
@@ -464,9 +468,11 @@ void packetmath() {
CHECK_CWISE2_IF(PacketTraits::HasMul, REF_MUL, internal::pmul);
CHECK_CWISE2_IF(PacketTraits::HasDiv, REF_DIV, internal::pdiv);
if (PacketTraits::HasNegate) CHECK_CWISE1(internal::negate, internal::pnegate);
CHECK_CWISE1_IF(PacketTraits::HasNegate, internal::negate, internal::pnegate);
CHECK_CWISE1_IF(PacketTraits::HasReciprocal, REF_RECIPROCAL, internal::preciprocal);
CHECK_CWISE1(numext::conj, internal::pconj);
for (int offset = 0; offset < 3; ++offset) {
for (int i = 0; i < PacketSize; ++i) ref[i] = data1[offset];
internal::pstore(data2, internal::pset1<Packet>(data1[offset]));