Faster conversion from integer types to bfloat16

Specialized `bfloat16_impl::float_to_bfloat16_rtne(float)` for normal floating point numbers, infinity and zero, in order to improve the performance of `bfloat16::bfloat16(const T&)` for integer argument types.

A reduction of more than 20% of the runtime duration of conversion from int to bfloat16 was observed, using Visual C++ 2019 on Windows 10.
This commit is contained in:
Niels Dekker
2020-07-14 23:22:34 +02:00
parent acab22c205
commit 0e1a33a461
2 changed files with 37 additions and 11 deletions

View File

@@ -31,7 +31,7 @@ float BinaryToFloat(uint32_t sign, uint32_t exponent, uint32_t high_mantissa,
void test_truncate(float input, float expected_truncation, float expected_rounding){
bfloat16 truncated = Eigen::bfloat16_impl::truncate_to_bfloat16(input);
bfloat16 rounded = Eigen::bfloat16_impl::float_to_bfloat16_rtne(input);
bfloat16 rounded = Eigen::bfloat16_impl::float_to_bfloat16_rtne<false>(input);
if ((numext::isnan)(input)){
VERIFY((numext::isnan)(static_cast<float>(truncated)) || (numext::isinf)(static_cast<float>(truncated)));
VERIFY((numext::isnan)(static_cast<float>(rounded)) || (numext::isinf)(static_cast<float>(rounded)));
@@ -93,7 +93,7 @@ void test_conversion()
} else {
VERIFY_IS_EQUAL(bf_trunc.value, 0x0000);
}
bfloat16 bf_round = Eigen::bfloat16_impl::float_to_bfloat16_rtne(denorm);
bfloat16 bf_round = Eigen::bfloat16_impl::float_to_bfloat16_rtne<false>(denorm);
VERIFY_IS_EQUAL(static_cast<float>(bf_round), 0.0f);
if (std::signbit(denorm)) {
VERIFY_IS_EQUAL(bf_round.value, 0x8000);