Use native _Float16 for AVX512FP16 and update vectorization.

This commit is contained in:
Antonio Sánchez
2025-03-19 19:55:26 +00:00
committed by Rasmus Munk Larsen
parent 0259a52b0e
commit 70f2aead9a
15 changed files with 1422 additions and 449 deletions

View File

@@ -26,19 +26,19 @@ inline T REF_MUL(const T& a, const T& b) {
}
template <typename T>
inline T REF_MADD(const T& a, const T& b, const T& c) {
return a * b + c;
return internal::pmadd(a, b, c);
}
template <typename T>
inline T REF_MSUB(const T& a, const T& b, const T& c) {
return a * b - c;
return internal::pmsub(a, b, c);
}
template <typename T>
inline T REF_NMADD(const T& a, const T& b, const T& c) {
return c - a * b;
return internal::pnmadd(a, b, c);
}
template <typename T>
inline T REF_NMSUB(const T& a, const T& b, const T& c) {
return test::negate(a * b + c);
return internal::pnmsub(a, b, c);
}
template <typename T>
inline T REF_DIV(const T& a, const T& b) {