mirror of https://github.com/xemu-project/xemu.git
softfloat: Move muladd_floats to softfloat-parts.c.inc
Rename to parts$N_muladd. Implement float128_muladd with FloatParts128. Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
aca845275a
commit
dedd123c56
|
@ -413,3 +413,129 @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
|
|||
a->sign = sign;
|
||||
return a;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the result of multiplying the floating-point values `a' and
|
||||
* `b' then adding 'c', with no intermediate rounding step after the
|
||||
* multiplication. The operation is performed according to the
|
||||
* IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
|
||||
* The flags argument allows the caller to select negation of the
|
||||
* addend, the intermediate product, or the final result. (The
|
||||
* difference between this and having the caller do a separate
|
||||
* negation is that negating externally will flip the sign bit on NaNs.)
|
||||
*
|
||||
* Requires A and C extracted into a double-sized structure to provide the
|
||||
* extra space for the widening multiply.
|
||||
*/
|
||||
static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
|
||||
FloatPartsN *c, int flags, float_status *s)
|
||||
{
|
||||
int ab_mask, abc_mask;
|
||||
FloatPartsW p_widen, c_widen;
|
||||
|
||||
ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
|
||||
abc_mask = float_cmask(c->cls) | ab_mask;
|
||||
|
||||
/*
|
||||
* It is implementation-defined whether the cases of (0,inf,qnan)
|
||||
* and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
|
||||
* they return if they do), so we have to hand this information
|
||||
* off to the target-specific pick-a-NaN routine.
|
||||
*/
|
||||
if (unlikely(abc_mask & float_cmask_anynan)) {
|
||||
return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
|
||||
}
|
||||
|
||||
if (flags & float_muladd_negate_c) {
|
||||
c->sign ^= 1;
|
||||
}
|
||||
|
||||
/* Compute the sign of the product into A. */
|
||||
a->sign ^= b->sign;
|
||||
if (flags & float_muladd_negate_product) {
|
||||
a->sign ^= 1;
|
||||
}
|
||||
|
||||
if (unlikely(ab_mask != float_cmask_normal)) {
|
||||
if (unlikely(ab_mask == float_cmask_infzero)) {
|
||||
goto d_nan;
|
||||
}
|
||||
|
||||
if (ab_mask & float_cmask_inf) {
|
||||
if (c->cls == float_class_inf && a->sign != c->sign) {
|
||||
goto d_nan;
|
||||
}
|
||||
goto return_inf;
|
||||
}
|
||||
|
||||
g_assert(ab_mask & float_cmask_zero);
|
||||
if (c->cls == float_class_normal) {
|
||||
*a = *c;
|
||||
goto return_normal;
|
||||
}
|
||||
if (c->cls == float_class_zero) {
|
||||
if (a->sign != c->sign) {
|
||||
goto return_sub_zero;
|
||||
}
|
||||
goto return_zero;
|
||||
}
|
||||
g_assert(c->cls == float_class_inf);
|
||||
}
|
||||
|
||||
if (unlikely(c->cls == float_class_inf)) {
|
||||
a->sign = c->sign;
|
||||
goto return_inf;
|
||||
}
|
||||
|
||||
/* Perform the multiplication step. */
|
||||
p_widen.sign = a->sign;
|
||||
p_widen.exp = a->exp + b->exp + 1;
|
||||
frac_mulw(&p_widen, a, b);
|
||||
if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
|
||||
frac_add(&p_widen, &p_widen, &p_widen);
|
||||
p_widen.exp -= 1;
|
||||
}
|
||||
|
||||
/* Perform the addition step. */
|
||||
if (c->cls != float_class_zero) {
|
||||
/* Zero-extend C to less significant bits. */
|
||||
frac_widen(&c_widen, c);
|
||||
c_widen.exp = c->exp;
|
||||
|
||||
if (a->sign == c->sign) {
|
||||
parts_add_normal(&p_widen, &c_widen);
|
||||
} else if (!parts_sub_normal(&p_widen, &c_widen)) {
|
||||
goto return_sub_zero;
|
||||
}
|
||||
}
|
||||
|
||||
/* Narrow with sticky bit, for proper rounding later. */
|
||||
frac_truncjam(a, &p_widen);
|
||||
a->sign = p_widen.sign;
|
||||
a->exp = p_widen.exp;
|
||||
|
||||
return_normal:
|
||||
if (flags & float_muladd_halve_result) {
|
||||
a->exp -= 1;
|
||||
}
|
||||
finish_sign:
|
||||
if (flags & float_muladd_negate_result) {
|
||||
a->sign ^= 1;
|
||||
}
|
||||
return a;
|
||||
|
||||
return_sub_zero:
|
||||
a->sign = s->float_rounding_mode == float_round_down;
|
||||
return_zero:
|
||||
a->cls = float_class_zero;
|
||||
goto finish_sign;
|
||||
|
||||
return_inf:
|
||||
a->cls = float_class_inf;
|
||||
goto finish_sign;
|
||||
|
||||
d_nan:
|
||||
float_raise(float_flag_invalid, s);
|
||||
parts_default_nan(a, s);
|
||||
return a;
|
||||
}
|
||||
|
|
406
fpu/softfloat.c
406
fpu/softfloat.c
|
@ -716,6 +716,10 @@ static float128 float128_pack_raw(const FloatParts128 *p)
|
|||
#define PARTS_GENERIC_64_128(NAME, P) \
|
||||
QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
|
||||
|
||||
#define PARTS_GENERIC_64_128_256(NAME, P) \
|
||||
QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
|
||||
(FloatParts128 *, parts128_##NAME), parts64_##NAME)
|
||||
|
||||
#define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
|
||||
#define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
|
||||
|
||||
|
@ -761,15 +765,17 @@ static void parts128_uncanon(FloatParts128 *p, float_status *status,
|
|||
|
||||
static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
|
||||
static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
|
||||
static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
|
||||
|
||||
#define parts_add_normal(A, B) \
|
||||
PARTS_GENERIC_64_128(add_normal, A)(A, B)
|
||||
PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
|
||||
|
||||
static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
|
||||
static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
|
||||
static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
|
||||
|
||||
#define parts_sub_normal(A, B) \
|
||||
PARTS_GENERIC_64_128(sub_normal, A)(A, B)
|
||||
PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
|
||||
|
||||
static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
|
||||
float_status *s, bool subtract);
|
||||
|
@ -787,6 +793,16 @@ static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
|
|||
#define parts_mul(A, B, S) \
|
||||
PARTS_GENERIC_64_128(mul, A)(A, B, S)
|
||||
|
||||
static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
|
||||
FloatParts64 *c, int flags,
|
||||
float_status *s);
|
||||
static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
|
||||
FloatParts128 *c, int flags,
|
||||
float_status *s);
|
||||
|
||||
#define parts_muladd(A, B, C, Z, S) \
|
||||
PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
|
||||
|
||||
/*
|
||||
* Helper functions for softfloat-parts.c.inc, per-size operations.
|
||||
*/
|
||||
|
@ -794,6 +810,10 @@ static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
|
|||
#define FRAC_GENERIC_64_128(NAME, P) \
|
||||
QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
|
||||
|
||||
#define FRAC_GENERIC_64_128_256(NAME, P) \
|
||||
QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
|
||||
(FloatParts128 *, frac128_##NAME), frac64_##NAME)
|
||||
|
||||
static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
|
||||
{
|
||||
return uadd64_overflow(a->frac, b->frac, &r->frac);
|
||||
|
@ -807,7 +827,17 @@ static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
|
|||
return c;
|
||||
}
|
||||
|
||||
#define frac_add(R, A, B) FRAC_GENERIC_64_128(add, R)(R, A, B)
|
||||
static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
|
||||
{
|
||||
bool c = 0;
|
||||
r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
|
||||
r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
|
||||
r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
|
||||
r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
|
||||
return c;
|
||||
}
|
||||
|
||||
#define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
|
||||
|
||||
static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
|
||||
{
|
||||
|
@ -902,7 +932,16 @@ static void frac128_neg(FloatParts128 *a)
|
|||
a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
|
||||
}
|
||||
|
||||
#define frac_neg(A) FRAC_GENERIC_64_128(neg, A)(A)
|
||||
static void frac256_neg(FloatParts256 *a)
|
||||
{
|
||||
bool c = 0;
|
||||
a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
|
||||
a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
|
||||
a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
|
||||
a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
|
||||
}
|
||||
|
||||
#define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
|
||||
|
||||
static int frac64_normalize(FloatParts64 *a)
|
||||
{
|
||||
|
@ -933,7 +972,55 @@ static int frac128_normalize(FloatParts128 *a)
|
|||
return 128;
|
||||
}
|
||||
|
||||
#define frac_normalize(A) FRAC_GENERIC_64_128(normalize, A)(A)
|
||||
static int frac256_normalize(FloatParts256 *a)
|
||||
{
|
||||
uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
|
||||
uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
|
||||
int ret, shl, shr;
|
||||
|
||||
if (likely(a0)) {
|
||||
shl = clz64(a0);
|
||||
if (shl == 0) {
|
||||
return 0;
|
||||
}
|
||||
ret = shl;
|
||||
} else {
|
||||
if (a1) {
|
||||
ret = 64;
|
||||
a0 = a1, a1 = a2, a2 = a3, a3 = 0;
|
||||
} else if (a2) {
|
||||
ret = 128;
|
||||
a0 = a2, a1 = a3, a2 = 0, a3 = 0;
|
||||
} else if (a3) {
|
||||
ret = 192;
|
||||
a0 = a3, a1 = 0, a2 = 0, a3 = 0;
|
||||
} else {
|
||||
ret = 256;
|
||||
a0 = 0, a1 = 0, a2 = 0, a3 = 0;
|
||||
goto done;
|
||||
}
|
||||
shl = clz64(a0);
|
||||
if (shl == 0) {
|
||||
goto done;
|
||||
}
|
||||
ret += shl;
|
||||
}
|
||||
|
||||
shr = -shl & 63;
|
||||
a0 = (a0 << shl) | (a1 >> shr);
|
||||
a1 = (a1 << shl) | (a2 >> shr);
|
||||
a2 = (a2 << shl) | (a3 >> shr);
|
||||
a3 = (a3 << shl);
|
||||
|
||||
done:
|
||||
a->frac_hi = a0;
|
||||
a->frac_hm = a1;
|
||||
a->frac_lm = a2;
|
||||
a->frac_lo = a3;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
|
||||
|
||||
static void frac64_shl(FloatParts64 *a, int c)
|
||||
{
|
||||
|
@ -969,7 +1056,51 @@ static void frac128_shrjam(FloatParts128 *a, int c)
|
|||
shift128RightJamming(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
|
||||
}
|
||||
|
||||
#define frac_shrjam(A, C) FRAC_GENERIC_64_128(shrjam, A)(A, C)
|
||||
static void frac256_shrjam(FloatParts256 *a, int c)
|
||||
{
|
||||
uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
|
||||
uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
|
||||
uint64_t sticky = 0;
|
||||
int invc;
|
||||
|
||||
if (unlikely(c == 0)) {
|
||||
return;
|
||||
} else if (likely(c < 64)) {
|
||||
/* nothing */
|
||||
} else if (likely(c < 256)) {
|
||||
if (unlikely(c & 128)) {
|
||||
sticky |= a2 | a3;
|
||||
a3 = a1, a2 = a0, a1 = 0, a0 = 0;
|
||||
}
|
||||
if (unlikely(c & 64)) {
|
||||
sticky |= a3;
|
||||
a3 = a2, a2 = a1, a1 = a0, a0 = 0;
|
||||
}
|
||||
c &= 63;
|
||||
if (c == 0) {
|
||||
goto done;
|
||||
}
|
||||
} else {
|
||||
sticky = a0 | a1 | a2 | a3;
|
||||
a0 = a1 = a2 = a3 = 0;
|
||||
goto done;
|
||||
}
|
||||
|
||||
invc = -c & 63;
|
||||
sticky |= a3 << invc;
|
||||
a3 = (a3 >> c) | (a2 << invc);
|
||||
a2 = (a2 >> c) | (a1 << invc);
|
||||
a1 = (a1 >> c) | (a0 << invc);
|
||||
a0 = (a0 >> c);
|
||||
|
||||
done:
|
||||
a->frac_lo = a3 | (sticky != 0);
|
||||
a->frac_lm = a2;
|
||||
a->frac_hm = a1;
|
||||
a->frac_hi = a0;
|
||||
}
|
||||
|
||||
#define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
|
||||
|
||||
static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
|
||||
{
|
||||
|
@ -984,7 +1115,17 @@ static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
|
|||
return c;
|
||||
}
|
||||
|
||||
#define frac_sub(R, A, B) FRAC_GENERIC_64_128(sub, R)(R, A, B)
|
||||
static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
|
||||
{
|
||||
bool c = 0;
|
||||
r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
|
||||
r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
|
||||
r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
|
||||
r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
|
||||
return c;
|
||||
}
|
||||
|
||||
#define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
|
||||
|
||||
static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
|
||||
{
|
||||
|
@ -999,6 +1140,22 @@ static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
|
|||
|
||||
#define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
|
||||
|
||||
static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
|
||||
{
|
||||
r->frac_hi = a->frac;
|
||||
r->frac_lo = 0;
|
||||
}
|
||||
|
||||
static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
|
||||
{
|
||||
r->frac_hi = a->frac_hi;
|
||||
r->frac_hm = a->frac_lo;
|
||||
r->frac_lm = 0;
|
||||
r->frac_lo = 0;
|
||||
}
|
||||
|
||||
#define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
|
||||
|
||||
#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
|
||||
#define FloatPartsN glue(FloatParts,N)
|
||||
#define FloatPartsW glue(FloatParts,W)
|
||||
|
@ -1017,6 +1174,12 @@ static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
|
|||
#include "softfloat-parts-addsub.c.inc"
|
||||
#include "softfloat-parts.c.inc"
|
||||
|
||||
#undef N
|
||||
#undef W
|
||||
#define N 256
|
||||
|
||||
#include "softfloat-parts-addsub.c.inc"
|
||||
|
||||
#undef N
|
||||
#undef W
|
||||
#undef partsN
|
||||
|
@ -1387,230 +1550,48 @@ float128_mul(float128 a, float128 b, float_status *status)
|
|||
}
|
||||
|
||||
/*
|
||||
* Returns the result of multiplying the floating-point values `a' and
|
||||
* `b' then adding 'c', with no intermediate rounding step after the
|
||||
* multiplication. The operation is performed according to the
|
||||
* IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
|
||||
* The flags argument allows the caller to select negation of the
|
||||
* addend, the intermediate product, or the final result. (The
|
||||
* difference between this and having the caller do a separate
|
||||
* negation is that negating externally will flip the sign bit on
|
||||
* NaNs.)
|
||||
* Fused multiply-add
|
||||
*/
|
||||
|
||||
static FloatParts64 muladd_floats(FloatParts64 a, FloatParts64 b, FloatParts64 c,
|
||||
int flags, float_status *s)
|
||||
{
|
||||
bool inf_zero, p_sign;
|
||||
bool sign_flip = flags & float_muladd_negate_result;
|
||||
FloatClass p_class;
|
||||
uint64_t hi, lo;
|
||||
int p_exp;
|
||||
int ab_mask, abc_mask;
|
||||
|
||||
ab_mask = float_cmask(a.cls) | float_cmask(b.cls);
|
||||
abc_mask = float_cmask(c.cls) | ab_mask;
|
||||
inf_zero = ab_mask == float_cmask_infzero;
|
||||
|
||||
/* It is implementation-defined whether the cases of (0,inf,qnan)
|
||||
* and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
|
||||
* they return if they do), so we have to hand this information
|
||||
* off to the target-specific pick-a-NaN routine.
|
||||
*/
|
||||
if (unlikely(abc_mask & float_cmask_anynan)) {
|
||||
return *parts_pick_nan_muladd(&a, &b, &c, s, ab_mask, abc_mask);
|
||||
}
|
||||
|
||||
if (inf_zero) {
|
||||
float_raise(float_flag_invalid, s);
|
||||
parts_default_nan(&a, s);
|
||||
return a;
|
||||
}
|
||||
|
||||
if (flags & float_muladd_negate_c) {
|
||||
c.sign ^= 1;
|
||||
}
|
||||
|
||||
p_sign = a.sign ^ b.sign;
|
||||
|
||||
if (flags & float_muladd_negate_product) {
|
||||
p_sign ^= 1;
|
||||
}
|
||||
|
||||
if (ab_mask & float_cmask_inf) {
|
||||
p_class = float_class_inf;
|
||||
} else if (ab_mask & float_cmask_zero) {
|
||||
p_class = float_class_zero;
|
||||
} else {
|
||||
p_class = float_class_normal;
|
||||
}
|
||||
|
||||
if (c.cls == float_class_inf) {
|
||||
if (p_class == float_class_inf && p_sign != c.sign) {
|
||||
float_raise(float_flag_invalid, s);
|
||||
parts_default_nan(&c, s);
|
||||
} else {
|
||||
c.sign ^= sign_flip;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
if (p_class == float_class_inf) {
|
||||
a.cls = float_class_inf;
|
||||
a.sign = p_sign ^ sign_flip;
|
||||
return a;
|
||||
}
|
||||
|
||||
if (p_class == float_class_zero) {
|
||||
if (c.cls == float_class_zero) {
|
||||
if (p_sign != c.sign) {
|
||||
p_sign = s->float_rounding_mode == float_round_down;
|
||||
}
|
||||
c.sign = p_sign;
|
||||
} else if (flags & float_muladd_halve_result) {
|
||||
c.exp -= 1;
|
||||
}
|
||||
c.sign ^= sign_flip;
|
||||
return c;
|
||||
}
|
||||
|
||||
/* a & b should be normals now... */
|
||||
assert(a.cls == float_class_normal &&
|
||||
b.cls == float_class_normal);
|
||||
|
||||
p_exp = a.exp + b.exp;
|
||||
|
||||
mul64To128(a.frac, b.frac, &hi, &lo);
|
||||
|
||||
/* Renormalize to the msb. */
|
||||
if (hi & DECOMPOSED_IMPLICIT_BIT) {
|
||||
p_exp += 1;
|
||||
} else {
|
||||
shortShift128Left(hi, lo, 1, &hi, &lo);
|
||||
}
|
||||
|
||||
/* + add/sub */
|
||||
if (c.cls != float_class_zero) {
|
||||
int exp_diff = p_exp - c.exp;
|
||||
if (p_sign == c.sign) {
|
||||
/* Addition */
|
||||
if (exp_diff <= 0) {
|
||||
shift64RightJamming(hi, -exp_diff, &hi);
|
||||
p_exp = c.exp;
|
||||
if (uadd64_overflow(hi, c.frac, &hi)) {
|
||||
shift64RightJamming(hi, 1, &hi);
|
||||
hi |= DECOMPOSED_IMPLICIT_BIT;
|
||||
p_exp += 1;
|
||||
}
|
||||
} else {
|
||||
uint64_t c_hi, c_lo, over;
|
||||
shift128RightJamming(c.frac, 0, exp_diff, &c_hi, &c_lo);
|
||||
add192(0, hi, lo, 0, c_hi, c_lo, &over, &hi, &lo);
|
||||
if (over) {
|
||||
shift64RightJamming(hi, 1, &hi);
|
||||
hi |= DECOMPOSED_IMPLICIT_BIT;
|
||||
p_exp += 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Subtraction */
|
||||
uint64_t c_hi = c.frac, c_lo = 0;
|
||||
|
||||
if (exp_diff <= 0) {
|
||||
shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
|
||||
if (exp_diff == 0
|
||||
&&
|
||||
(hi > c_hi || (hi == c_hi && lo >= c_lo))) {
|
||||
sub128(hi, lo, c_hi, c_lo, &hi, &lo);
|
||||
} else {
|
||||
sub128(c_hi, c_lo, hi, lo, &hi, &lo);
|
||||
p_sign ^= 1;
|
||||
p_exp = c.exp;
|
||||
}
|
||||
} else {
|
||||
shift128RightJamming(c_hi, c_lo,
|
||||
exp_diff,
|
||||
&c_hi, &c_lo);
|
||||
sub128(hi, lo, c_hi, c_lo, &hi, &lo);
|
||||
}
|
||||
|
||||
if (hi == 0 && lo == 0) {
|
||||
a.cls = float_class_zero;
|
||||
a.sign = s->float_rounding_mode == float_round_down;
|
||||
a.sign ^= sign_flip;
|
||||
return a;
|
||||
} else {
|
||||
int shift;
|
||||
if (hi != 0) {
|
||||
shift = clz64(hi);
|
||||
} else {
|
||||
shift = clz64(lo) + 64;
|
||||
}
|
||||
/* Normalizing to a binary point of 124 is the
|
||||
correct adjust for the exponent. However since we're
|
||||
shifting, we might as well put the binary point back
|
||||
at 63 where we really want it. Therefore shift as
|
||||
if we're leaving 1 bit at the top of the word, but
|
||||
adjust the exponent as if we're leaving 3 bits. */
|
||||
shift128Left(hi, lo, shift, &hi, &lo);
|
||||
p_exp -= shift;
|
||||
}
|
||||
}
|
||||
}
|
||||
hi |= (lo != 0);
|
||||
|
||||
if (flags & float_muladd_halve_result) {
|
||||
p_exp -= 1;
|
||||
}
|
||||
|
||||
/* finally prepare our result */
|
||||
a.cls = float_class_normal;
|
||||
a.sign = p_sign ^ sign_flip;
|
||||
a.exp = p_exp;
|
||||
a.frac = hi;
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
|
||||
int flags, float_status *status)
|
||||
int flags, float_status *status)
|
||||
{
|
||||
FloatParts64 pa, pb, pc, pr;
|
||||
FloatParts64 pa, pb, pc, *pr;
|
||||
|
||||
float16_unpack_canonical(&pa, a, status);
|
||||
float16_unpack_canonical(&pb, b, status);
|
||||
float16_unpack_canonical(&pc, c, status);
|
||||
pr = muladd_floats(pa, pb, pc, flags, status);
|
||||
pr = parts_muladd(&pa, &pb, &pc, flags, status);
|
||||
|
||||
return float16_round_pack_canonical(&pr, status);
|
||||
return float16_round_pack_canonical(pr, status);
|
||||
}
|
||||
|
||||
static float32 QEMU_SOFTFLOAT_ATTR
|
||||
soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
|
||||
float_status *status)
|
||||
{
|
||||
FloatParts64 pa, pb, pc, pr;
|
||||
FloatParts64 pa, pb, pc, *pr;
|
||||
|
||||
float32_unpack_canonical(&pa, a, status);
|
||||
float32_unpack_canonical(&pb, b, status);
|
||||
float32_unpack_canonical(&pc, c, status);
|
||||
pr = muladd_floats(pa, pb, pc, flags, status);
|
||||
pr = parts_muladd(&pa, &pb, &pc, flags, status);
|
||||
|
||||
return float32_round_pack_canonical(&pr, status);
|
||||
return float32_round_pack_canonical(pr, status);
|
||||
}
|
||||
|
||||
static float64 QEMU_SOFTFLOAT_ATTR
|
||||
soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
|
||||
float_status *status)
|
||||
{
|
||||
FloatParts64 pa, pb, pc, pr;
|
||||
FloatParts64 pa, pb, pc, *pr;
|
||||
|
||||
float64_unpack_canonical(&pa, a, status);
|
||||
float64_unpack_canonical(&pb, b, status);
|
||||
float64_unpack_canonical(&pc, c, status);
|
||||
pr = muladd_floats(pa, pb, pc, flags, status);
|
||||
pr = parts_muladd(&pa, &pb, &pc, flags, status);
|
||||
|
||||
return float64_round_pack_canonical(&pr, status);
|
||||
return float64_round_pack_canonical(pr, status);
|
||||
}
|
||||
|
||||
static bool force_soft_fma;
|
||||
|
@ -1757,23 +1738,30 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
|
|||
return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the result of multiplying the bfloat16 values `a'
|
||||
* and `b' then adding 'c', with no intermediate rounding step after the
|
||||
* multiplication.
|
||||
*/
|
||||
|
||||
bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
|
||||
int flags, float_status *status)
|
||||
{
|
||||
FloatParts64 pa, pb, pc, pr;
|
||||
FloatParts64 pa, pb, pc, *pr;
|
||||
|
||||
bfloat16_unpack_canonical(&pa, a, status);
|
||||
bfloat16_unpack_canonical(&pb, b, status);
|
||||
bfloat16_unpack_canonical(&pc, c, status);
|
||||
pr = muladd_floats(pa, pb, pc, flags, status);
|
||||
pr = parts_muladd(&pa, &pb, &pc, flags, status);
|
||||
|
||||
return bfloat16_round_pack_canonical(&pr, status);
|
||||
return bfloat16_round_pack_canonical(pr, status);
|
||||
}
|
||||
|
||||
float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
|
||||
int flags, float_status *status)
|
||||
{
|
||||
FloatParts128 pa, pb, pc, *pr;
|
||||
|
||||
float128_unpack_canonical(&pa, a, status);
|
||||
float128_unpack_canonical(&pb, b, status);
|
||||
float128_unpack_canonical(&pc, c, status);
|
||||
pr = parts_muladd(&pa, &pb, &pc, flags, status);
|
||||
|
||||
return float128_round_pack_canonical(pr, status);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -1197,6 +1197,8 @@ float128 float128_round_to_int(float128, float_status *status);
|
|||
float128 float128_add(float128, float128, float_status *status);
|
||||
float128 float128_sub(float128, float128, float_status *status);
|
||||
float128 float128_mul(float128, float128, float_status *status);
|
||||
float128 float128_muladd(float128, float128, float128, int,
|
||||
float_status *status);
|
||||
float128 float128_div(float128, float128, float_status *status);
|
||||
float128 float128_rem(float128, float128, float_status *status);
|
||||
float128 float128_sqrt(float128, float_status *status);
|
||||
|
|
|
@ -386,7 +386,7 @@ static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
|
|||
for (i = 0; i < OPS_PER_ITER; i++) {
|
||||
float128 a = ops[0].f128;
|
||||
float128 b = ops[1].f128;
|
||||
/* float128 c = ops[2].f128; */
|
||||
float128 c = ops[2].f128;
|
||||
|
||||
switch (op) {
|
||||
case OP_ADD:
|
||||
|
@ -401,9 +401,9 @@ static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
|
|||
case OP_DIV:
|
||||
res.f128 = float128_div(a, b, &soft_status);
|
||||
break;
|
||||
/* case OP_FMA: */
|
||||
/* res.f128 = float128_muladd(a, b, c, 0, &soft_status); */
|
||||
/* break; */
|
||||
case OP_FMA:
|
||||
res.f128 = float128_muladd(a, b, c, 0, &soft_status);
|
||||
break;
|
||||
case OP_SQRT:
|
||||
res.f128 = float128_sqrt(a, &soft_status);
|
||||
break;
|
||||
|
|
|
@ -717,7 +717,7 @@ static void do_testfloat(int op, int rmode, bool exact)
|
|||
test_abz_f128(true_abz_f128M, subj_abz_f128M);
|
||||
break;
|
||||
case F128_MULADD:
|
||||
not_implemented();
|
||||
test_abcz_f128(slow_f128M_mulAdd, qemu_f128M_mulAdd);
|
||||
break;
|
||||
case F128_SQRT:
|
||||
test_az_f128(slow_f128M_sqrt, qemu_f128M_sqrt);
|
||||
|
|
|
@ -574,6 +574,18 @@ WRAP_MULADD(qemu_f32_mulAdd, float32_muladd, float32)
|
|||
WRAP_MULADD(qemu_f64_mulAdd, float64_muladd, float64)
|
||||
#undef WRAP_MULADD
|
||||
|
||||
static void qemu_f128M_mulAdd(const float128_t *ap, const float128_t *bp,
|
||||
const float128_t *cp, float128_t *res)
|
||||
{
|
||||
float128 a, b, c, ret;
|
||||
|
||||
a = soft_to_qemu128(*ap);
|
||||
b = soft_to_qemu128(*bp);
|
||||
c = soft_to_qemu128(*cp);
|
||||
ret = float128_muladd(a, b, c, 0, &qsf);
|
||||
*res = qemu_to_soft128(ret);
|
||||
}
|
||||
|
||||
#define WRAP_CMP16(name, func, retcond) \
|
||||
static bool name(float16_t a, float16_t b) \
|
||||
{ \
|
||||
|
|
Loading…
Reference in New Issue