Faster conversion from integer types to bfloat16

Specialized `bfloat16_impl::float_to_bfloat16_rtne(float)` for normal floating point numbers, infinity and zero, in order to improve the performance of `bfloat16::bfloat16(const T&)` for integer argument types. A reduction of more than 20% of the runtime duration of conversion from int to bfloat16 was observed, using Visual C++ 2019 on Windows 10.
2026-04-10 11:34:33 +08:00 · 2020-07-14 23:22:34 +02:00
parent acab22c205
commit 0e1a33a461
2 changed files with 37 additions and 11 deletions
--- a/test/bfloat16_float.cpp
+++ b/test/bfloat16_float.cpp
@@ -31,7 +31,7 @@ float BinaryToFloat(uint32_t sign, uint32_t exponent, uint32_t high_mantissa,

 void test_truncate(float input, float expected_truncation, float expected_rounding){
  bfloat16 truncated = Eigen::bfloat16_impl::truncate_to_bfloat16(input);
-  bfloat16 rounded = Eigen::bfloat16_impl::float_to_bfloat16_rtne(input);
+  bfloat16 rounded = Eigen::bfloat16_impl::float_to_bfloat16_rtne<false>(input);
  if ((numext::isnan)(input)){
    VERIFY((numext::isnan)(static_cast<float>(truncated)) || (numext::isinf)(static_cast<float>(truncated)));
    VERIFY((numext::isnan)(static_cast<float>(rounded)) || (numext::isinf)(static_cast<float>(rounded)));
@@ -93,7 +93,7 @@ void test_conversion()
    } else {
      VERIFY_IS_EQUAL(bf_trunc.value, 0x0000);
    }
-    bfloat16 bf_round = Eigen::bfloat16_impl::float_to_bfloat16_rtne(denorm);
+    bfloat16 bf_round = Eigen::bfloat16_impl::float_to_bfloat16_rtne<false>(denorm);
    VERIFY_IS_EQUAL(static_cast<float>(bf_round), 0.0f);
    if (std::signbit(denorm)) {
      VERIFY_IS_EQUAL(bf_round.value, 0x8000);