mirror of https://github.com/xemu-project/xemu.git
softfloat: Define operations for bfloat16
This patch implements operations for bfloat16 except conversion and some misc operations. We also add FloatFmt and pack/unpack interfaces for bfloat16. As they are both static fields, we can't make a sperate patch for them. Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-Id: <20200813071421.2509-2-zhiwei_liu@c-sky.com> [rth: Use FloatRelation for comparison operations.] Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
a03e924cf8
commit
8282310d85
168
fpu/softfloat.c
168
fpu/softfloat.c
|
@ -554,6 +554,10 @@ static const FloatFmt float16_params_ahp = {
|
||||||
.arm_althp = true
|
.arm_althp = true
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const FloatFmt bfloat16_params = {
|
||||||
|
FLOAT_PARAMS(8, 7)
|
||||||
|
};
|
||||||
|
|
||||||
static const FloatFmt float32_params = {
|
static const FloatFmt float32_params = {
|
||||||
FLOAT_PARAMS(8, 23)
|
FLOAT_PARAMS(8, 23)
|
||||||
};
|
};
|
||||||
|
@ -580,6 +584,11 @@ static inline FloatParts float16_unpack_raw(float16 f)
|
||||||
return unpack_raw(float16_params, f);
|
return unpack_raw(float16_params, f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline FloatParts bfloat16_unpack_raw(bfloat16 f)
|
||||||
|
{
|
||||||
|
return unpack_raw(bfloat16_params, f);
|
||||||
|
}
|
||||||
|
|
||||||
static inline FloatParts float32_unpack_raw(float32 f)
|
static inline FloatParts float32_unpack_raw(float32 f)
|
||||||
{
|
{
|
||||||
return unpack_raw(float32_params, f);
|
return unpack_raw(float32_params, f);
|
||||||
|
@ -603,6 +612,11 @@ static inline float16 float16_pack_raw(FloatParts p)
|
||||||
return make_float16(pack_raw(float16_params, p));
|
return make_float16(pack_raw(float16_params, p));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bfloat16 bfloat16_pack_raw(FloatParts p)
|
||||||
|
{
|
||||||
|
return pack_raw(bfloat16_params, p);
|
||||||
|
}
|
||||||
|
|
||||||
static inline float32 float32_pack_raw(FloatParts p)
|
static inline float32 float32_pack_raw(FloatParts p)
|
||||||
{
|
{
|
||||||
return make_float32(pack_raw(float32_params, p));
|
return make_float32(pack_raw(float32_params, p));
|
||||||
|
@ -820,6 +834,11 @@ static FloatParts float16_unpack_canonical(float16 f, float_status *s)
|
||||||
return float16a_unpack_canonical(f, s, &float16_params);
|
return float16a_unpack_canonical(f, s, &float16_params);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static FloatParts bfloat16_unpack_canonical(bfloat16 f, float_status *s)
|
||||||
|
{
|
||||||
|
return sf_canonicalize(bfloat16_unpack_raw(f), &bfloat16_params, s);
|
||||||
|
}
|
||||||
|
|
||||||
static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
|
static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
|
||||||
const FloatFmt *params)
|
const FloatFmt *params)
|
||||||
{
|
{
|
||||||
|
@ -831,6 +850,11 @@ static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
|
||||||
return float16a_round_pack_canonical(p, s, &float16_params);
|
return float16a_round_pack_canonical(p, s, &float16_params);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bfloat16 bfloat16_round_pack_canonical(FloatParts p, float_status *s)
|
||||||
|
{
|
||||||
|
return bfloat16_pack_raw(round_canonical(p, s, &bfloat16_params));
|
||||||
|
}
|
||||||
|
|
||||||
static FloatParts float32_unpack_canonical(float32 f, float_status *s)
|
static FloatParts float32_unpack_canonical(float32 f, float_status *s)
|
||||||
{
|
{
|
||||||
return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
|
return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
|
||||||
|
@ -1158,6 +1182,28 @@ float64_sub(float64 a, float64 b, float_status *s)
|
||||||
return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
|
return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns the result of adding or subtracting the bfloat16
|
||||||
|
* values `a' and `b'.
|
||||||
|
*/
|
||||||
|
bfloat16 QEMU_FLATTEN bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
|
||||||
|
{
|
||||||
|
FloatParts pa = bfloat16_unpack_canonical(a, status);
|
||||||
|
FloatParts pb = bfloat16_unpack_canonical(b, status);
|
||||||
|
FloatParts pr = addsub_floats(pa, pb, false, status);
|
||||||
|
|
||||||
|
return bfloat16_round_pack_canonical(pr, status);
|
||||||
|
}
|
||||||
|
|
||||||
|
bfloat16 QEMU_FLATTEN bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
|
||||||
|
{
|
||||||
|
FloatParts pa = bfloat16_unpack_canonical(a, status);
|
||||||
|
FloatParts pb = bfloat16_unpack_canonical(b, status);
|
||||||
|
FloatParts pr = addsub_floats(pa, pb, true, status);
|
||||||
|
|
||||||
|
return bfloat16_round_pack_canonical(pr, status);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns the result of multiplying the floating-point values `a' and
|
* Returns the result of multiplying the floating-point values `a' and
|
||||||
* `b'. The operation is performed according to the IEC/IEEE Standard
|
* `b'. The operation is performed according to the IEC/IEEE Standard
|
||||||
|
@ -1260,6 +1306,20 @@ float64_mul(float64 a, float64 b, float_status *s)
|
||||||
f64_is_zon2, f64_addsubmul_post);
|
f64_is_zon2, f64_addsubmul_post);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns the result of multiplying the bfloat16
|
||||||
|
* values `a' and `b'.
|
||||||
|
*/
|
||||||
|
|
||||||
|
bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
|
||||||
|
{
|
||||||
|
FloatParts pa = bfloat16_unpack_canonical(a, status);
|
||||||
|
FloatParts pb = bfloat16_unpack_canonical(b, status);
|
||||||
|
FloatParts pr = mul_floats(pa, pb, status);
|
||||||
|
|
||||||
|
return bfloat16_round_pack_canonical(pr, status);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns the result of multiplying the floating-point values `a' and
|
* Returns the result of multiplying the floating-point values `a' and
|
||||||
* `b' then adding 'c', with no intermediate rounding step after the
|
* `b' then adding 'c', with no intermediate rounding step after the
|
||||||
|
@ -1642,6 +1702,23 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
|
||||||
return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
|
return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns the result of multiplying the bfloat16 values `a'
|
||||||
|
* and `b' then adding 'c', with no intermediate rounding step after the
|
||||||
|
* multiplication.
|
||||||
|
*/
|
||||||
|
|
||||||
|
bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
|
||||||
|
int flags, float_status *status)
|
||||||
|
{
|
||||||
|
FloatParts pa = bfloat16_unpack_canonical(a, status);
|
||||||
|
FloatParts pb = bfloat16_unpack_canonical(b, status);
|
||||||
|
FloatParts pc = bfloat16_unpack_canonical(c, status);
|
||||||
|
FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
|
||||||
|
|
||||||
|
return bfloat16_round_pack_canonical(pr, status);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns the result of dividing the floating-point value `a' by the
|
* Returns the result of dividing the floating-point value `a' by the
|
||||||
* corresponding value `b'. The operation is performed according to
|
* corresponding value `b'. The operation is performed according to
|
||||||
|
@ -1808,6 +1885,20 @@ float64_div(float64 a, float64 b, float_status *s)
|
||||||
f64_div_pre, f64_div_post);
|
f64_div_pre, f64_div_post);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns the result of dividing the bfloat16
|
||||||
|
* value `a' by the corresponding value `b'.
|
||||||
|
*/
|
||||||
|
|
||||||
|
bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
|
||||||
|
{
|
||||||
|
FloatParts pa = bfloat16_unpack_canonical(a, status);
|
||||||
|
FloatParts pb = bfloat16_unpack_canonical(b, status);
|
||||||
|
FloatParts pr = div_floats(pa, pb, status);
|
||||||
|
|
||||||
|
return bfloat16_round_pack_canonical(pr, status);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Float to Float conversions
|
* Float to Float conversions
|
||||||
*
|
*
|
||||||
|
@ -2881,6 +2972,25 @@ MINMAX(64, maxnummag, false, true, true)
|
||||||
|
|
||||||
#undef MINMAX
|
#undef MINMAX
|
||||||
|
|
||||||
|
#define BF16_MINMAX(name, ismin, isiee, ismag) \
|
||||||
|
bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s) \
|
||||||
|
{ \
|
||||||
|
FloatParts pa = bfloat16_unpack_canonical(a, s); \
|
||||||
|
FloatParts pb = bfloat16_unpack_canonical(b, s); \
|
||||||
|
FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
|
||||||
|
\
|
||||||
|
return bfloat16_round_pack_canonical(pr, s); \
|
||||||
|
}
|
||||||
|
|
||||||
|
BF16_MINMAX(min, true, false, false)
|
||||||
|
BF16_MINMAX(minnum, true, true, false)
|
||||||
|
BF16_MINMAX(minnummag, true, true, true)
|
||||||
|
BF16_MINMAX(max, false, false, false)
|
||||||
|
BF16_MINMAX(maxnum, false, true, false)
|
||||||
|
BF16_MINMAX(maxnummag, false, true, true)
|
||||||
|
|
||||||
|
#undef BF16_MINMAX
|
||||||
|
|
||||||
/* Floating point compare */
|
/* Floating point compare */
|
||||||
static FloatRelation compare_floats(FloatParts a, FloatParts b, bool is_quiet,
|
static FloatRelation compare_floats(FloatParts a, FloatParts b, bool is_quiet,
|
||||||
float_status *s)
|
float_status *s)
|
||||||
|
@ -3042,6 +3152,24 @@ FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
|
||||||
return f64_compare(a, b, true, s);
|
return f64_compare(a, b, true, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static FloatRelation QEMU_FLATTEN
|
||||||
|
soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
|
||||||
|
{
|
||||||
|
FloatParts pa = bfloat16_unpack_canonical(a, s);
|
||||||
|
FloatParts pb = bfloat16_unpack_canonical(b, s);
|
||||||
|
return compare_floats(pa, pb, is_quiet, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
|
||||||
|
{
|
||||||
|
return soft_bf16_compare(a, b, false, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
|
||||||
|
{
|
||||||
|
return soft_bf16_compare(a, b, true, s);
|
||||||
|
}
|
||||||
|
|
||||||
/* Multiply A by 2 raised to the power N. */
|
/* Multiply A by 2 raised to the power N. */
|
||||||
static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
|
static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
|
||||||
{
|
{
|
||||||
|
@ -3081,6 +3209,13 @@ float64 float64_scalbn(float64 a, int n, float_status *status)
|
||||||
return float64_round_pack_canonical(pr, status);
|
return float64_round_pack_canonical(pr, status);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
|
||||||
|
{
|
||||||
|
FloatParts pa = bfloat16_unpack_canonical(a, status);
|
||||||
|
FloatParts pr = scalbn_decomposed(pa, n, status);
|
||||||
|
return bfloat16_round_pack_canonical(pr, status);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Square Root
|
* Square Root
|
||||||
*
|
*
|
||||||
|
@ -3231,6 +3366,13 @@ float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
|
||||||
return soft_f64_sqrt(ua.s, s);
|
return soft_f64_sqrt(ua.s, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
|
||||||
|
{
|
||||||
|
FloatParts pa = bfloat16_unpack_canonical(a, status);
|
||||||
|
FloatParts pr = sqrt_float(pa, status, &bfloat16_params);
|
||||||
|
return bfloat16_round_pack_canonical(pr, status);
|
||||||
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
| The pattern for a default generated NaN.
|
| The pattern for a default generated NaN.
|
||||||
*----------------------------------------------------------------------------*/
|
*----------------------------------------------------------------------------*/
|
||||||
|
@ -3273,6 +3415,13 @@ float128 float128_default_nan(float_status *status)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bfloat16 bfloat16_default_nan(float_status *status)
|
||||||
|
{
|
||||||
|
FloatParts p = parts_default_nan(status);
|
||||||
|
p.frac >>= bfloat16_params.frac_shift;
|
||||||
|
return bfloat16_pack_raw(p);
|
||||||
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
|
| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
|
||||||
*----------------------------------------------------------------------------*/
|
*----------------------------------------------------------------------------*/
|
||||||
|
@ -3304,6 +3453,14 @@ float64 float64_silence_nan(float64 a, float_status *status)
|
||||||
return float64_pack_raw(p);
|
return float64_pack_raw(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
|
||||||
|
{
|
||||||
|
FloatParts p = bfloat16_unpack_raw(a);
|
||||||
|
p.frac <<= bfloat16_params.frac_shift;
|
||||||
|
p = parts_silence_nan(p, status);
|
||||||
|
p.frac >>= bfloat16_params.frac_shift;
|
||||||
|
return bfloat16_pack_raw(p);
|
||||||
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
| If `a' is denormal and we are in flush-to-zero mode then set the
|
| If `a' is denormal and we are in flush-to-zero mode then set the
|
||||||
|
@ -3353,6 +3510,17 @@ float64 float64_squash_input_denormal(float64 a, float_status *status)
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
|
||||||
|
{
|
||||||
|
if (status->flush_inputs_to_zero) {
|
||||||
|
FloatParts p = bfloat16_unpack_raw(a);
|
||||||
|
if (parts_squash_denormal(p, status)) {
|
||||||
|
return bfloat16_set_sign(bfloat16_zero, p.sign);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
|
| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
|
||||||
| and 7, and returns the properly rounded 32-bit integer corresponding to the
|
| and 7, and returns the properly rounded 32-bit integer corresponding to the
|
||||||
|
|
|
@ -112,6 +112,11 @@ typedef struct {
|
||||||
#define make_float128(high_, low_) ((float128) { .high = high_, .low = low_ })
|
#define make_float128(high_, low_) ((float128) { .high = high_, .low = low_ })
|
||||||
#define make_float128_init(high_, low_) { .high = high_, .low = low_ }
|
#define make_float128_init(high_, low_) { .high = high_, .low = low_ }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Software neural-network floating-point types.
|
||||||
|
*/
|
||||||
|
typedef uint16_t bfloat16;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Software IEC/IEEE floating-point underflow tininess-detection mode.
|
* Software IEC/IEEE floating-point underflow tininess-detection mode.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -109,6 +109,7 @@ void float_raise(uint8_t flags, float_status *status);
|
||||||
float16 float16_squash_input_denormal(float16 a, float_status *status);
|
float16 float16_squash_input_denormal(float16 a, float_status *status);
|
||||||
float32 float32_squash_input_denormal(float32 a, float_status *status);
|
float32 float32_squash_input_denormal(float32 a, float_status *status);
|
||||||
float64 float64_squash_input_denormal(float64 a, float_status *status);
|
float64 float64_squash_input_denormal(float64 a, float_status *status);
|
||||||
|
bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status);
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
| Options to indicate which negations to perform in float*_muladd()
|
| Options to indicate which negations to perform in float*_muladd()
|
||||||
|
@ -347,6 +348,43 @@ static inline bool float16_unordered_quiet(float16 a, float16 b,
|
||||||
#define float16_three make_float16(0x4200)
|
#define float16_three make_float16(0x4200)
|
||||||
#define float16_infinity make_float16(0x7c00)
|
#define float16_infinity make_float16(0x7c00)
|
||||||
|
|
||||||
|
/*----------------------------------------------------------------------------
|
||||||
|
| Software bfloat16 operations.
|
||||||
|
*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
bfloat16 bfloat16_add(bfloat16, bfloat16, float_status *status);
|
||||||
|
bfloat16 bfloat16_sub(bfloat16, bfloat16, float_status *status);
|
||||||
|
bfloat16 bfloat16_mul(bfloat16, bfloat16, float_status *status);
|
||||||
|
bfloat16 bfloat16_div(bfloat16, bfloat16, float_status *status);
|
||||||
|
bfloat16 bfloat16_muladd(bfloat16, bfloat16, bfloat16, int,
|
||||||
|
float_status *status);
|
||||||
|
float16 bfloat16_scalbn(bfloat16, int, float_status *status);
|
||||||
|
bfloat16 bfloat16_min(bfloat16, bfloat16, float_status *status);
|
||||||
|
bfloat16 bfloat16_max(bfloat16, bfloat16, float_status *status);
|
||||||
|
bfloat16 bfloat16_minnum(bfloat16, bfloat16, float_status *status);
|
||||||
|
bfloat16 bfloat16_maxnum(bfloat16, bfloat16, float_status *status);
|
||||||
|
bfloat16 bfloat16_minnummag(bfloat16, bfloat16, float_status *status);
|
||||||
|
bfloat16 bfloat16_maxnummag(bfloat16, bfloat16, float_status *status);
|
||||||
|
bfloat16 bfloat16_sqrt(bfloat16, float_status *status);
|
||||||
|
FloatRelation bfloat16_compare(bfloat16, bfloat16, float_status *status);
|
||||||
|
FloatRelation bfloat16_compare_quiet(bfloat16, bfloat16, float_status *status);
|
||||||
|
|
||||||
|
bfloat16 bfloat16_silence_nan(bfloat16, float_status *status);
|
||||||
|
bfloat16 bfloat16_default_nan(float_status *status);
|
||||||
|
|
||||||
|
static inline bfloat16 bfloat16_set_sign(bfloat16 a, int sign)
|
||||||
|
{
|
||||||
|
return (a & 0x7fff) | (sign << 15);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define bfloat16_zero 0
|
||||||
|
#define bfloat16_half 0x3f00
|
||||||
|
#define bfloat16_one 0x3f80
|
||||||
|
#define bfloat16_one_point_five 0x3fc0
|
||||||
|
#define bfloat16_two 0x4000
|
||||||
|
#define bfloat16_three 0x4040
|
||||||
|
#define bfloat16_infinity 0x7f80
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
| The pattern for a default generated half-precision NaN.
|
| The pattern for a default generated half-precision NaN.
|
||||||
*----------------------------------------------------------------------------*/
|
*----------------------------------------------------------------------------*/
|
||||||
|
|
Loading…
Reference in New Issue