mirror of https://github.com/xemu-project/xemu.git
Reorg FloatParts to use QEMU_GENERIC.
Begin replacing the Berkeley float128 routines with FloatParts128. - includes a new implementation of float128_muladd - includes the snan silencing that was missing from float{32,64}_to_float128 and float128_to_float{32,64}. - does not include float128_min/max* (written but not yet reviewed). -----BEGIN PGP SIGNATURE----- iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmChD54dHHJpY2hhcmQu aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV94uAgApJMxVvkRkDuyOXG2 cM0dC+GQQ0prm5id2AW2JREiET+jo2NV7uU8IQGEQq3rtG8trws45gMQFgSRYJk2 sLlAIt4QqD6qzb2H9z+JhOx1yITlsuwrvr+BAwVtK7gw6l4LxKAs35SwWpz/Z5/2 R63bLyontVzzi40Bc4pB/h3CxdOR+UjZ2a2kDIZFuI/j+9pnPoEL/Vp9XMg85ex+ g21rRwE6qv4hrGMhej5YBKQoleoieL3FQ0sXQLi5lLNYejBpU45PjdgdEwbZIBhT 4sQkzV2HRrd84OrQIJU3Jd+zHZoSq6JQUZRSGAnqC7Mvigplo24J5GRjh6T8WoaI y495Lg== =MR2G -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-fp-20210516' into staging Reorg FloatParts to use QEMU_GENERIC. Begin replacing the Berkeley float128 routines with FloatParts128. - includes a new implementation of float128_muladd - includes the snan silencing that was missing from float{32,64}_to_float128 and float128_to_float{32,64}. - does not include float128_min/max* (written but not yet reviewed). # gpg: Signature made Sun 16 May 2021 13:27:10 BST # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full] # Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F * remotes/rth-gitlab/tags/pull-fp-20210516: (46 commits) softfloat: Move round_to_int_and_pack to softfloat-parts.c.inc softfloat: Move round_to_int to softfloat-parts.c.inc softfloat: Convert float-to-float conversions with float128 softfloat: Split float_to_float softfloat: Move div_floats to softfloat-parts.c.inc softfloat: Introduce sh[lr]_double primitives softfloat: Tidy mul128By64To192 softfloat: Use add192 in mul128To256 softfloat: Use mulu64 for mul64To128 softfloat: Move muladd_floats to softfloat-parts.c.inc softfloat: Move mul_floats to softfloat-parts.c.inc softfloat: Implement float128_add/sub via parts softfloat: Move addsub_floats to softfloat-parts.c.inc softfloat: Use uadd64_carry, usub64_borrow in softfloat-macros.h softfloat: Move round_canonical to softfloat-parts.c.inc softfloat: Move sf_canonicalize to softfloat-parts.c.inc softfloat: Move pick_nan_muladd to softfloat-parts.c.inc softfloat: Move pick_nan to softfloat-parts.c.inc softfloat: Move return_nan to softfloat-parts.c.inc softfloat: Convert float128_default_nan to parts ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
1acbc0fdf2
|
@ -1073,9 +1073,8 @@ void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
|
|||
for (i = 0; i < oprsz; i += sizeof(int32_t)) {
|
||||
int32_t ai = *(int32_t *)(a + i);
|
||||
int32_t bi = *(int32_t *)(b + i);
|
||||
int32_t di = ai + bi;
|
||||
if (((di ^ ai) &~ (ai ^ bi)) < 0) {
|
||||
/* Signed overflow. */
|
||||
int32_t di;
|
||||
if (sadd32_overflow(ai, bi, &di)) {
|
||||
di = (di < 0 ? INT32_MAX : INT32_MIN);
|
||||
}
|
||||
*(int32_t *)(d + i) = di;
|
||||
|
@ -1091,9 +1090,8 @@ void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
|
|||
for (i = 0; i < oprsz; i += sizeof(int64_t)) {
|
||||
int64_t ai = *(int64_t *)(a + i);
|
||||
int64_t bi = *(int64_t *)(b + i);
|
||||
int64_t di = ai + bi;
|
||||
if (((di ^ ai) &~ (ai ^ bi)) < 0) {
|
||||
/* Signed overflow. */
|
||||
int64_t di;
|
||||
if (sadd64_overflow(ai, bi, &di)) {
|
||||
di = (di < 0 ? INT64_MAX : INT64_MIN);
|
||||
}
|
||||
*(int64_t *)(d + i) = di;
|
||||
|
@ -1143,9 +1141,8 @@ void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
|
|||
for (i = 0; i < oprsz; i += sizeof(int32_t)) {
|
||||
int32_t ai = *(int32_t *)(a + i);
|
||||
int32_t bi = *(int32_t *)(b + i);
|
||||
int32_t di = ai - bi;
|
||||
if (((di ^ ai) & (ai ^ bi)) < 0) {
|
||||
/* Signed overflow. */
|
||||
int32_t di;
|
||||
if (ssub32_overflow(ai, bi, &di)) {
|
||||
di = (di < 0 ? INT32_MAX : INT32_MIN);
|
||||
}
|
||||
*(int32_t *)(d + i) = di;
|
||||
|
@ -1161,9 +1158,8 @@ void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
|
|||
for (i = 0; i < oprsz; i += sizeof(int64_t)) {
|
||||
int64_t ai = *(int64_t *)(a + i);
|
||||
int64_t bi = *(int64_t *)(b + i);
|
||||
int64_t di = ai - bi;
|
||||
if (((di ^ ai) & (ai ^ bi)) < 0) {
|
||||
/* Signed overflow. */
|
||||
int64_t di;
|
||||
if (ssub64_overflow(ai, bi, &di)) {
|
||||
di = (di < 0 ? INT64_MAX : INT64_MIN);
|
||||
}
|
||||
*(int64_t *)(d + i) = di;
|
||||
|
@ -1209,8 +1205,8 @@ void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
|
|||
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
|
||||
uint32_t ai = *(uint32_t *)(a + i);
|
||||
uint32_t bi = *(uint32_t *)(b + i);
|
||||
uint32_t di = ai + bi;
|
||||
if (di < ai) {
|
||||
uint32_t di;
|
||||
if (uadd32_overflow(ai, bi, &di)) {
|
||||
di = UINT32_MAX;
|
||||
}
|
||||
*(uint32_t *)(d + i) = di;
|
||||
|
@ -1226,8 +1222,8 @@ void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
|
|||
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
|
||||
uint64_t ai = *(uint64_t *)(a + i);
|
||||
uint64_t bi = *(uint64_t *)(b + i);
|
||||
uint64_t di = ai + bi;
|
||||
if (di < ai) {
|
||||
uint64_t di;
|
||||
if (uadd64_overflow(ai, bi, &di)) {
|
||||
di = UINT64_MAX;
|
||||
}
|
||||
*(uint64_t *)(d + i) = di;
|
||||
|
@ -1273,8 +1269,8 @@ void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
|
|||
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
|
||||
uint32_t ai = *(uint32_t *)(a + i);
|
||||
uint32_t bi = *(uint32_t *)(b + i);
|
||||
uint32_t di = ai - bi;
|
||||
if (ai < bi) {
|
||||
uint32_t di;
|
||||
if (usub32_overflow(ai, bi, &di)) {
|
||||
di = 0;
|
||||
}
|
||||
*(uint32_t *)(d + i) = di;
|
||||
|
@ -1290,8 +1286,8 @@ void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
|
|||
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
|
||||
uint64_t ai = *(uint64_t *)(a + i);
|
||||
uint64_t bi = *(uint64_t *)(b + i);
|
||||
uint64_t di = ai - bi;
|
||||
if (ai < bi) {
|
||||
uint64_t di;
|
||||
if (usub64_overflow(ai, bi, &di)) {
|
||||
di = 0;
|
||||
}
|
||||
*(uint64_t *)(d + i) = di;
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Floating point arithmetic implementation
|
||||
*
|
||||
* The code in this source file is derived from release 2a of the SoftFloat
|
||||
* IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
|
||||
* some later contributions) are provided under that license, as detailed below.
|
||||
* It has subsequently been modified by contributors to the QEMU Project,
|
||||
* so some portions are provided under:
|
||||
* the SoftFloat-2a license
|
||||
* the BSD license
|
||||
* GPL-v2-or-later
|
||||
*
|
||||
* Any future contributions to this file after December 1st 2014 will be
|
||||
* taken to be licensed under the Softfloat-2a license unless specifically
|
||||
* indicated otherwise.
|
||||
*/
|
||||
|
||||
static void partsN(add_normal)(FloatPartsN *a, FloatPartsN *b)
|
||||
{
|
||||
int exp_diff = a->exp - b->exp;
|
||||
|
||||
if (exp_diff > 0) {
|
||||
frac_shrjam(b, exp_diff);
|
||||
} else if (exp_diff < 0) {
|
||||
frac_shrjam(a, -exp_diff);
|
||||
a->exp = b->exp;
|
||||
}
|
||||
|
||||
if (frac_add(a, a, b)) {
|
||||
frac_shrjam(a, 1);
|
||||
a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
|
||||
a->exp += 1;
|
||||
}
|
||||
}
|
||||
|
||||
static bool partsN(sub_normal)(FloatPartsN *a, FloatPartsN *b)
|
||||
{
|
||||
int exp_diff = a->exp - b->exp;
|
||||
int shift;
|
||||
|
||||
if (exp_diff > 0) {
|
||||
frac_shrjam(b, exp_diff);
|
||||
frac_sub(a, a, b);
|
||||
} else if (exp_diff < 0) {
|
||||
a->exp = b->exp;
|
||||
a->sign ^= 1;
|
||||
frac_shrjam(a, -exp_diff);
|
||||
frac_sub(a, b, a);
|
||||
} else if (frac_sub(a, a, b)) {
|
||||
/* Overflow means that A was less than B. */
|
||||
frac_neg(a);
|
||||
a->sign ^= 1;
|
||||
}
|
||||
|
||||
shift = frac_normalize(a);
|
||||
if (likely(shift < N)) {
|
||||
a->exp -= shift;
|
||||
return true;
|
||||
}
|
||||
a->cls = float_class_zero;
|
||||
return false;
|
||||
}
|
|
@ -0,0 +1,817 @@
|
|||
/*
|
||||
* QEMU float support
|
||||
*
|
||||
* The code in this source file is derived from release 2a of the SoftFloat
|
||||
* IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
|
||||
* some later contributions) are provided under that license, as detailed below.
|
||||
* It has subsequently been modified by contributors to the QEMU Project,
|
||||
* so some portions are provided under:
|
||||
* the SoftFloat-2a license
|
||||
* the BSD license
|
||||
* GPL-v2-or-later
|
||||
*
|
||||
* Any future contributions to this file after December 1st 2014 will be
|
||||
* taken to be licensed under the Softfloat-2a license unless specifically
|
||||
* indicated otherwise.
|
||||
*/
|
||||
|
||||
static void partsN(return_nan)(FloatPartsN *a, float_status *s)
|
||||
{
|
||||
switch (a->cls) {
|
||||
case float_class_snan:
|
||||
float_raise(float_flag_invalid, s);
|
||||
if (s->default_nan_mode) {
|
||||
parts_default_nan(a, s);
|
||||
} else {
|
||||
parts_silence_nan(a, s);
|
||||
}
|
||||
break;
|
||||
case float_class_qnan:
|
||||
if (s->default_nan_mode) {
|
||||
parts_default_nan(a, s);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
|
||||
static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
|
||||
float_status *s)
|
||||
{
|
||||
if (is_snan(a->cls) || is_snan(b->cls)) {
|
||||
float_raise(float_flag_invalid, s);
|
||||
}
|
||||
|
||||
if (s->default_nan_mode) {
|
||||
parts_default_nan(a, s);
|
||||
} else {
|
||||
int cmp = frac_cmp(a, b);
|
||||
if (cmp == 0) {
|
||||
cmp = a->sign < b->sign;
|
||||
}
|
||||
|
||||
if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
|
||||
a = b;
|
||||
}
|
||||
if (is_snan(a->cls)) {
|
||||
parts_silence_nan(a, s);
|
||||
}
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
|
||||
FloatPartsN *c, float_status *s,
|
||||
int ab_mask, int abc_mask)
|
||||
{
|
||||
int which;
|
||||
|
||||
if (unlikely(abc_mask & float_cmask_snan)) {
|
||||
float_raise(float_flag_invalid, s);
|
||||
}
|
||||
|
||||
which = pickNaNMulAdd(a->cls, b->cls, c->cls,
|
||||
ab_mask == float_cmask_infzero, s);
|
||||
|
||||
if (s->default_nan_mode || which == 3) {
|
||||
/*
|
||||
* Note that this check is after pickNaNMulAdd so that function
|
||||
* has an opportunity to set the Invalid flag for infzero.
|
||||
*/
|
||||
parts_default_nan(a, s);
|
||||
return a;
|
||||
}
|
||||
|
||||
switch (which) {
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
a = b;
|
||||
break;
|
||||
case 2:
|
||||
a = c;
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
if (is_snan(a->cls)) {
|
||||
parts_silence_nan(a, s);
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
/*
|
||||
* Canonicalize the FloatParts structure. Determine the class,
|
||||
* unbias the exponent, and normalize the fraction.
|
||||
*/
|
||||
static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
|
||||
const FloatFmt *fmt)
|
||||
{
|
||||
if (unlikely(p->exp == 0)) {
|
||||
if (likely(frac_eqz(p))) {
|
||||
p->cls = float_class_zero;
|
||||
} else if (status->flush_inputs_to_zero) {
|
||||
float_raise(float_flag_input_denormal, status);
|
||||
p->cls = float_class_zero;
|
||||
frac_clear(p);
|
||||
} else {
|
||||
int shift = frac_normalize(p);
|
||||
p->cls = float_class_normal;
|
||||
p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
|
||||
}
|
||||
} else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
|
||||
p->cls = float_class_normal;
|
||||
p->exp -= fmt->exp_bias;
|
||||
frac_shl(p, fmt->frac_shift);
|
||||
p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
|
||||
} else if (likely(frac_eqz(p))) {
|
||||
p->cls = float_class_inf;
|
||||
} else {
|
||||
frac_shl(p, fmt->frac_shift);
|
||||
p->cls = (parts_is_snan_frac(p->frac_hi, status)
|
||||
? float_class_snan : float_class_qnan);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Round and uncanonicalize a floating-point number by parts. There
|
||||
* are FRAC_SHIFT bits that may require rounding at the bottom of the
|
||||
* fraction; these bits will be removed. The exponent will be biased
|
||||
* by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
|
||||
*/
|
||||
static void partsN(uncanon)(FloatPartsN *p, float_status *s,
|
||||
const FloatFmt *fmt)
|
||||
{
|
||||
const int exp_max = fmt->exp_max;
|
||||
const int frac_shift = fmt->frac_shift;
|
||||
const uint64_t frac_lsb = fmt->frac_lsb;
|
||||
const uint64_t frac_lsbm1 = fmt->frac_lsbm1;
|
||||
const uint64_t round_mask = fmt->round_mask;
|
||||
const uint64_t roundeven_mask = fmt->roundeven_mask;
|
||||
uint64_t inc;
|
||||
bool overflow_norm;
|
||||
int exp, flags = 0;
|
||||
|
||||
if (unlikely(p->cls != float_class_normal)) {
|
||||
switch (p->cls) {
|
||||
case float_class_zero:
|
||||
p->exp = 0;
|
||||
frac_clear(p);
|
||||
return;
|
||||
case float_class_inf:
|
||||
g_assert(!fmt->arm_althp);
|
||||
p->exp = fmt->exp_max;
|
||||
frac_clear(p);
|
||||
return;
|
||||
case float_class_qnan:
|
||||
case float_class_snan:
|
||||
g_assert(!fmt->arm_althp);
|
||||
p->exp = fmt->exp_max;
|
||||
frac_shr(p, fmt->frac_shift);
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
switch (s->float_rounding_mode) {
|
||||
case float_round_nearest_even:
|
||||
overflow_norm = false;
|
||||
inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
|
||||
break;
|
||||
case float_round_ties_away:
|
||||
overflow_norm = false;
|
||||
inc = frac_lsbm1;
|
||||
break;
|
||||
case float_round_to_zero:
|
||||
overflow_norm = true;
|
||||
inc = 0;
|
||||
break;
|
||||
case float_round_up:
|
||||
inc = p->sign ? 0 : round_mask;
|
||||
overflow_norm = p->sign;
|
||||
break;
|
||||
case float_round_down:
|
||||
inc = p->sign ? round_mask : 0;
|
||||
overflow_norm = !p->sign;
|
||||
break;
|
||||
case float_round_to_odd:
|
||||
overflow_norm = true;
|
||||
inc = p->frac_lo & frac_lsb ? 0 : round_mask;
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
exp = p->exp + fmt->exp_bias;
|
||||
if (likely(exp > 0)) {
|
||||
if (p->frac_lo & round_mask) {
|
||||
flags |= float_flag_inexact;
|
||||
if (frac_addi(p, p, inc)) {
|
||||
frac_shr(p, 1);
|
||||
p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
|
||||
exp++;
|
||||
}
|
||||
}
|
||||
frac_shr(p, frac_shift);
|
||||
|
||||
if (fmt->arm_althp) {
|
||||
/* ARM Alt HP eschews Inf and NaN for a wider exponent. */
|
||||
if (unlikely(exp > exp_max)) {
|
||||
/* Overflow. Return the maximum normal. */
|
||||
flags = float_flag_invalid;
|
||||
exp = exp_max;
|
||||
frac_allones(p);
|
||||
}
|
||||
} else if (unlikely(exp >= exp_max)) {
|
||||
flags |= float_flag_overflow | float_flag_inexact;
|
||||
if (overflow_norm) {
|
||||
exp = exp_max - 1;
|
||||
frac_allones(p);
|
||||
} else {
|
||||
p->cls = float_class_inf;
|
||||
exp = exp_max;
|
||||
frac_clear(p);
|
||||
}
|
||||
}
|
||||
} else if (s->flush_to_zero) {
|
||||
flags |= float_flag_output_denormal;
|
||||
p->cls = float_class_zero;
|
||||
exp = 0;
|
||||
frac_clear(p);
|
||||
} else {
|
||||
bool is_tiny = s->tininess_before_rounding || exp < 0;
|
||||
|
||||
if (!is_tiny) {
|
||||
FloatPartsN discard;
|
||||
is_tiny = !frac_addi(&discard, p, inc);
|
||||
}
|
||||
|
||||
frac_shrjam(p, 1 - exp);
|
||||
|
||||
if (p->frac_lo & round_mask) {
|
||||
/* Need to recompute round-to-even/round-to-odd. */
|
||||
switch (s->float_rounding_mode) {
|
||||
case float_round_nearest_even:
|
||||
inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
|
||||
? frac_lsbm1 : 0);
|
||||
break;
|
||||
case float_round_to_odd:
|
||||
inc = p->frac_lo & frac_lsb ? 0 : round_mask;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
flags |= float_flag_inexact;
|
||||
frac_addi(p, p, inc);
|
||||
}
|
||||
|
||||
exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
|
||||
frac_shr(p, frac_shift);
|
||||
|
||||
if (is_tiny && (flags & float_flag_inexact)) {
|
||||
flags |= float_flag_underflow;
|
||||
}
|
||||
if (exp == 0 && frac_eqz(p)) {
|
||||
p->cls = float_class_zero;
|
||||
}
|
||||
}
|
||||
p->exp = exp;
|
||||
float_raise(flags, s);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the result of adding or subtracting the values of the
|
||||
* floating-point values `a' and `b'. The operation is performed
|
||||
* according to the IEC/IEEE Standard for Binary Floating-Point
|
||||
* Arithmetic.
|
||||
*/
|
||||
static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
|
||||
float_status *s, bool subtract)
|
||||
{
|
||||
bool b_sign = b->sign ^ subtract;
|
||||
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
|
||||
|
||||
if (a->sign != b_sign) {
|
||||
/* Subtraction */
|
||||
if (likely(ab_mask == float_cmask_normal)) {
|
||||
if (parts_sub_normal(a, b)) {
|
||||
return a;
|
||||
}
|
||||
/* Subtract was exact, fall through to set sign. */
|
||||
ab_mask = float_cmask_zero;
|
||||
}
|
||||
|
||||
if (ab_mask == float_cmask_zero) {
|
||||
a->sign = s->float_rounding_mode == float_round_down;
|
||||
return a;
|
||||
}
|
||||
|
||||
if (unlikely(ab_mask & float_cmask_anynan)) {
|
||||
goto p_nan;
|
||||
}
|
||||
|
||||
if (ab_mask & float_cmask_inf) {
|
||||
if (a->cls != float_class_inf) {
|
||||
/* N - Inf */
|
||||
goto return_b;
|
||||
}
|
||||
if (b->cls != float_class_inf) {
|
||||
/* Inf - N */
|
||||
return a;
|
||||
}
|
||||
/* Inf - Inf */
|
||||
float_raise(float_flag_invalid, s);
|
||||
parts_default_nan(a, s);
|
||||
return a;
|
||||
}
|
||||
} else {
|
||||
/* Addition */
|
||||
if (likely(ab_mask == float_cmask_normal)) {
|
||||
parts_add_normal(a, b);
|
||||
return a;
|
||||
}
|
||||
|
||||
if (ab_mask == float_cmask_zero) {
|
||||
return a;
|
||||
}
|
||||
|
||||
if (unlikely(ab_mask & float_cmask_anynan)) {
|
||||
goto p_nan;
|
||||
}
|
||||
|
||||
if (ab_mask & float_cmask_inf) {
|
||||
a->cls = float_class_inf;
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
if (b->cls == float_class_zero) {
|
||||
g_assert(a->cls == float_class_normal);
|
||||
return a;
|
||||
}
|
||||
|
||||
g_assert(a->cls == float_class_zero);
|
||||
g_assert(b->cls == float_class_normal);
|
||||
return_b:
|
||||
b->sign = b_sign;
|
||||
return b;
|
||||
|
||||
p_nan:
|
||||
return parts_pick_nan(a, b, s);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the result of multiplying the floating-point values `a' and
|
||||
* `b'. The operation is performed according to the IEC/IEEE Standard
|
||||
* for Binary Floating-Point Arithmetic.
|
||||
*/
|
||||
static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
|
||||
float_status *s)
|
||||
{
|
||||
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
|
||||
bool sign = a->sign ^ b->sign;
|
||||
|
||||
if (likely(ab_mask == float_cmask_normal)) {
|
||||
FloatPartsW tmp;
|
||||
|
||||
frac_mulw(&tmp, a, b);
|
||||
frac_truncjam(a, &tmp);
|
||||
|
||||
a->exp += b->exp + 1;
|
||||
if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
|
||||
frac_add(a, a, a);
|
||||
a->exp -= 1;
|
||||
}
|
||||
|
||||
a->sign = sign;
|
||||
return a;
|
||||
}
|
||||
|
||||
/* Inf * Zero == NaN */
|
||||
if (unlikely(ab_mask == float_cmask_infzero)) {
|
||||
float_raise(float_flag_invalid, s);
|
||||
parts_default_nan(a, s);
|
||||
return a;
|
||||
}
|
||||
|
||||
if (unlikely(ab_mask & float_cmask_anynan)) {
|
||||
return parts_pick_nan(a, b, s);
|
||||
}
|
||||
|
||||
/* Multiply by 0 or Inf */
|
||||
if (ab_mask & float_cmask_inf) {
|
||||
a->cls = float_class_inf;
|
||||
a->sign = sign;
|
||||
return a;
|
||||
}
|
||||
|
||||
g_assert(ab_mask & float_cmask_zero);
|
||||
a->cls = float_class_zero;
|
||||
a->sign = sign;
|
||||
return a;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the result of multiplying the floating-point values `a' and
|
||||
* `b' then adding 'c', with no intermediate rounding step after the
|
||||
* multiplication. The operation is performed according to the
|
||||
* IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
|
||||
* The flags argument allows the caller to select negation of the
|
||||
* addend, the intermediate product, or the final result. (The
|
||||
* difference between this and having the caller do a separate
|
||||
* negation is that negating externally will flip the sign bit on NaNs.)
|
||||
*
|
||||
* Requires A and C extracted into a double-sized structure to provide the
|
||||
* extra space for the widening multiply.
|
||||
*/
|
||||
static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
|
||||
FloatPartsN *c, int flags, float_status *s)
|
||||
{
|
||||
int ab_mask, abc_mask;
|
||||
FloatPartsW p_widen, c_widen;
|
||||
|
||||
ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
|
||||
abc_mask = float_cmask(c->cls) | ab_mask;
|
||||
|
||||
/*
|
||||
* It is implementation-defined whether the cases of (0,inf,qnan)
|
||||
* and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
|
||||
* they return if they do), so we have to hand this information
|
||||
* off to the target-specific pick-a-NaN routine.
|
||||
*/
|
||||
if (unlikely(abc_mask & float_cmask_anynan)) {
|
||||
return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
|
||||
}
|
||||
|
||||
if (flags & float_muladd_negate_c) {
|
||||
c->sign ^= 1;
|
||||
}
|
||||
|
||||
/* Compute the sign of the product into A. */
|
||||
a->sign ^= b->sign;
|
||||
if (flags & float_muladd_negate_product) {
|
||||
a->sign ^= 1;
|
||||
}
|
||||
|
||||
if (unlikely(ab_mask != float_cmask_normal)) {
|
||||
if (unlikely(ab_mask == float_cmask_infzero)) {
|
||||
goto d_nan;
|
||||
}
|
||||
|
||||
if (ab_mask & float_cmask_inf) {
|
||||
if (c->cls == float_class_inf && a->sign != c->sign) {
|
||||
goto d_nan;
|
||||
}
|
||||
goto return_inf;
|
||||
}
|
||||
|
||||
g_assert(ab_mask & float_cmask_zero);
|
||||
if (c->cls == float_class_normal) {
|
||||
*a = *c;
|
||||
goto return_normal;
|
||||
}
|
||||
if (c->cls == float_class_zero) {
|
||||
if (a->sign != c->sign) {
|
||||
goto return_sub_zero;
|
||||
}
|
||||
goto return_zero;
|
||||
}
|
||||
g_assert(c->cls == float_class_inf);
|
||||
}
|
||||
|
||||
if (unlikely(c->cls == float_class_inf)) {
|
||||
a->sign = c->sign;
|
||||
goto return_inf;
|
||||
}
|
||||
|
||||
/* Perform the multiplication step. */
|
||||
p_widen.sign = a->sign;
|
||||
p_widen.exp = a->exp + b->exp + 1;
|
||||
frac_mulw(&p_widen, a, b);
|
||||
if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
|
||||
frac_add(&p_widen, &p_widen, &p_widen);
|
||||
p_widen.exp -= 1;
|
||||
}
|
||||
|
||||
/* Perform the addition step. */
|
||||
if (c->cls != float_class_zero) {
|
||||
/* Zero-extend C to less significant bits. */
|
||||
frac_widen(&c_widen, c);
|
||||
c_widen.exp = c->exp;
|
||||
|
||||
if (a->sign == c->sign) {
|
||||
parts_add_normal(&p_widen, &c_widen);
|
||||
} else if (!parts_sub_normal(&p_widen, &c_widen)) {
|
||||
goto return_sub_zero;
|
||||
}
|
||||
}
|
||||
|
||||
/* Narrow with sticky bit, for proper rounding later. */
|
||||
frac_truncjam(a, &p_widen);
|
||||
a->sign = p_widen.sign;
|
||||
a->exp = p_widen.exp;
|
||||
|
||||
return_normal:
|
||||
if (flags & float_muladd_halve_result) {
|
||||
a->exp -= 1;
|
||||
}
|
||||
finish_sign:
|
||||
if (flags & float_muladd_negate_result) {
|
||||
a->sign ^= 1;
|
||||
}
|
||||
return a;
|
||||
|
||||
return_sub_zero:
|
||||
a->sign = s->float_rounding_mode == float_round_down;
|
||||
return_zero:
|
||||
a->cls = float_class_zero;
|
||||
goto finish_sign;
|
||||
|
||||
return_inf:
|
||||
a->cls = float_class_inf;
|
||||
goto finish_sign;
|
||||
|
||||
d_nan:
|
||||
float_raise(float_flag_invalid, s);
|
||||
parts_default_nan(a, s);
|
||||
return a;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the result of dividing the floating-point value `a' by the
|
||||
* corresponding value `b'. The operation is performed according to
|
||||
* the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
||||
*/
|
||||
static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
|
||||
float_status *s)
|
||||
{
|
||||
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
|
||||
bool sign = a->sign ^ b->sign;
|
||||
|
||||
if (likely(ab_mask == float_cmask_normal)) {
|
||||
a->sign = sign;
|
||||
a->exp -= b->exp + frac_div(a, b);
|
||||
return a;
|
||||
}
|
||||
|
||||
/* 0/0 or Inf/Inf => NaN */
|
||||
if (unlikely(ab_mask == float_cmask_zero) ||
|
||||
unlikely(ab_mask == float_cmask_inf)) {
|
||||
float_raise(float_flag_invalid, s);
|
||||
parts_default_nan(a, s);
|
||||
return a;
|
||||
}
|
||||
|
||||
/* All the NaN cases */
|
||||
if (unlikely(ab_mask & float_cmask_anynan)) {
|
||||
return parts_pick_nan(a, b, s);
|
||||
}
|
||||
|
||||
a->sign = sign;
|
||||
|
||||
/* Inf / X */
|
||||
if (a->cls == float_class_inf) {
|
||||
return a;
|
||||
}
|
||||
|
||||
/* 0 / X */
|
||||
if (a->cls == float_class_zero) {
|
||||
return a;
|
||||
}
|
||||
|
||||
/* X / Inf */
|
||||
if (b->cls == float_class_inf) {
|
||||
a->cls = float_class_zero;
|
||||
return a;
|
||||
}
|
||||
|
||||
/* X / 0 => Inf */
|
||||
g_assert(b->cls == float_class_zero);
|
||||
float_raise(float_flag_divbyzero, s);
|
||||
a->cls = float_class_inf;
|
||||
return a;
|
||||
}
|
||||
|
||||
/*
|
||||
* Rounds the floating-point value `a' to an integer, and returns the
|
||||
* result as a floating-point value. The operation is performed
|
||||
* according to the IEC/IEEE Standard for Binary Floating-Point
|
||||
* Arithmetic.
|
||||
*
|
||||
* parts_round_to_int_normal is an internal helper function for
|
||||
* normal numbers only, returning true for inexact but not directly
|
||||
* raising float_flag_inexact.
|
||||
*/
|
||||
static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
|
||||
int scale, int frac_size)
|
||||
{
|
||||
uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
|
||||
int shift_adj;
|
||||
|
||||
scale = MIN(MAX(scale, -0x10000), 0x10000);
|
||||
a->exp += scale;
|
||||
|
||||
if (a->exp < 0) {
|
||||
bool one;
|
||||
|
||||
/* All fractional */
|
||||
switch (rmode) {
|
||||
case float_round_nearest_even:
|
||||
one = false;
|
||||
if (a->exp == -1) {
|
||||
FloatPartsN tmp;
|
||||
/* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
|
||||
frac_add(&tmp, a, a);
|
||||
/* Anything remaining means frac > 0.5. */
|
||||
one = !frac_eqz(&tmp);
|
||||
}
|
||||
break;
|
||||
case float_round_ties_away:
|
||||
one = a->exp == -1;
|
||||
break;
|
||||
case float_round_to_zero:
|
||||
one = false;
|
||||
break;
|
||||
case float_round_up:
|
||||
one = !a->sign;
|
||||
break;
|
||||
case float_round_down:
|
||||
one = a->sign;
|
||||
break;
|
||||
case float_round_to_odd:
|
||||
one = true;
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
frac_clear(a);
|
||||
a->exp = 0;
|
||||
if (one) {
|
||||
a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
|
||||
} else {
|
||||
a->cls = float_class_zero;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
if (a->exp >= frac_size) {
|
||||
/* All integral */
|
||||
return false;
|
||||
}
|
||||
|
||||
if (N > 64 && a->exp < N - 64) {
|
||||
/*
|
||||
* Rounding is not in the low word -- shift lsb to bit 2,
|
||||
* which leaves room for sticky and rounding bit.
|
||||
*/
|
||||
shift_adj = (N - 1) - (a->exp + 2);
|
||||
frac_shrjam(a, shift_adj);
|
||||
frac_lsb = 1 << 2;
|
||||
} else {
|
||||
shift_adj = 0;
|
||||
frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
|
||||
}
|
||||
|
||||
frac_lsbm1 = frac_lsb >> 1;
|
||||
rnd_mask = frac_lsb - 1;
|
||||
rnd_even_mask = rnd_mask | frac_lsb;
|
||||
|
||||
if (!(a->frac_lo & rnd_mask)) {
|
||||
/* Fractional bits already clear, undo the shift above. */
|
||||
frac_shl(a, shift_adj);
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (rmode) {
|
||||
case float_round_nearest_even:
|
||||
inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
|
||||
break;
|
||||
case float_round_ties_away:
|
||||
inc = frac_lsbm1;
|
||||
break;
|
||||
case float_round_to_zero:
|
||||
inc = 0;
|
||||
break;
|
||||
case float_round_up:
|
||||
inc = a->sign ? 0 : rnd_mask;
|
||||
break;
|
||||
case float_round_down:
|
||||
inc = a->sign ? rnd_mask : 0;
|
||||
break;
|
||||
case float_round_to_odd:
|
||||
inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
if (shift_adj == 0) {
|
||||
if (frac_addi(a, a, inc)) {
|
||||
frac_shr(a, 1);
|
||||
a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
|
||||
a->exp++;
|
||||
}
|
||||
a->frac_lo &= ~rnd_mask;
|
||||
} else {
|
||||
frac_addi(a, a, inc);
|
||||
a->frac_lo &= ~rnd_mask;
|
||||
/* Be careful shifting back, not to overflow */
|
||||
frac_shl(a, shift_adj - 1);
|
||||
if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
|
||||
a->exp++;
|
||||
} else {
|
||||
frac_add(a, a, a);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
|
||||
int scale, float_status *s,
|
||||
const FloatFmt *fmt)
|
||||
{
|
||||
switch (a->cls) {
|
||||
case float_class_qnan:
|
||||
case float_class_snan:
|
||||
parts_return_nan(a, s);
|
||||
break;
|
||||
case float_class_zero:
|
||||
case float_class_inf:
|
||||
break;
|
||||
case float_class_normal:
|
||||
if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
|
||||
float_raise(float_flag_inexact, s);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the result of converting the floating-point value `a' to
|
||||
* the two's complement integer format. The conversion is performed
|
||||
* according to the IEC/IEEE Standard for Binary Floating-Point
|
||||
* Arithmetic---which means in particular that the conversion is
|
||||
* rounded according to the current rounding mode. If `a' is a NaN,
|
||||
* the largest positive integer is returned. Otherwise, if the
|
||||
* conversion overflows, the largest integer with the same sign as `a'
|
||||
* is returned.
|
||||
*/
|
||||
static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
|
||||
int scale, int64_t min, int64_t max,
|
||||
float_status *s)
|
||||
{
|
||||
int flags = 0;
|
||||
uint64_t r;
|
||||
|
||||
switch (p->cls) {
|
||||
case float_class_snan:
|
||||
case float_class_qnan:
|
||||
flags = float_flag_invalid;
|
||||
r = max;
|
||||
break;
|
||||
|
||||
case float_class_inf:
|
||||
flags = float_flag_invalid;
|
||||
r = p->sign ? min : max;
|
||||
break;
|
||||
|
||||
case float_class_zero:
|
||||
return 0;
|
||||
|
||||
case float_class_normal:
|
||||
/* TODO: N - 2 is frac_size for rounding; could use input fmt. */
|
||||
if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
|
||||
flags = float_flag_inexact;
|
||||
}
|
||||
|
||||
if (p->exp <= DECOMPOSED_BINARY_POINT) {
|
||||
r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
|
||||
} else {
|
||||
r = UINT64_MAX;
|
||||
}
|
||||
if (p->sign) {
|
||||
if (r <= -(uint64_t)min) {
|
||||
r = -r;
|
||||
} else {
|
||||
flags = float_flag_invalid;
|
||||
r = min;
|
||||
}
|
||||
} else if (r > max) {
|
||||
flags = float_flag_invalid;
|
||||
r = max;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
float_raise(flags, s);
|
||||
return r;
|
||||
}
|
|
@ -129,7 +129,7 @@ static bool parts_is_snan_frac(uint64_t frac, float_status *status)
|
|||
| The pattern for a default generated deconstructed floating-point NaN.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static FloatParts parts_default_nan(float_status *status)
|
||||
static void parts64_default_nan(FloatParts64 *p, float_status *status)
|
||||
{
|
||||
bool sign = 0;
|
||||
uint64_t frac;
|
||||
|
@ -163,7 +163,7 @@ static FloatParts parts_default_nan(float_status *status)
|
|||
}
|
||||
#endif
|
||||
|
||||
return (FloatParts) {
|
||||
*p = (FloatParts64) {
|
||||
.cls = float_class_qnan,
|
||||
.sign = sign,
|
||||
.exp = INT_MAX,
|
||||
|
@ -171,26 +171,55 @@ static FloatParts parts_default_nan(float_status *status)
|
|||
};
|
||||
}
|
||||
|
||||
static void parts128_default_nan(FloatParts128 *p, float_status *status)
|
||||
{
|
||||
/*
|
||||
* Extrapolate from the choices made by parts64_default_nan to fill
|
||||
* in the quad-floating format. If the low bit is set, assume we
|
||||
* want to set all non-snan bits.
|
||||
*/
|
||||
FloatParts64 p64;
|
||||
parts64_default_nan(&p64, status);
|
||||
|
||||
*p = (FloatParts128) {
|
||||
.cls = float_class_qnan,
|
||||
.sign = p64.sign,
|
||||
.exp = INT_MAX,
|
||||
.frac_hi = p64.frac,
|
||||
.frac_lo = -(p64.frac & 1)
|
||||
};
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns a quiet NaN from a signalling NaN for the deconstructed
|
||||
| floating-point parts.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static FloatParts parts_silence_nan(FloatParts a, float_status *status)
|
||||
static uint64_t parts_silence_nan_frac(uint64_t frac, float_status *status)
|
||||
{
|
||||
g_assert(!no_signaling_nans(status));
|
||||
#if defined(TARGET_HPPA)
|
||||
a.frac &= ~(1ULL << (DECOMPOSED_BINARY_POINT - 1));
|
||||
a.frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 2);
|
||||
#else
|
||||
g_assert(!status->default_nan_mode);
|
||||
|
||||
/* The only snan_bit_is_one target without default_nan_mode is HPPA. */
|
||||
if (snan_bit_is_one(status)) {
|
||||
return parts_default_nan(status);
|
||||
frac &= ~(1ULL << (DECOMPOSED_BINARY_POINT - 1));
|
||||
frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 2);
|
||||
} else {
|
||||
a.frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 1);
|
||||
frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 1);
|
||||
}
|
||||
#endif
|
||||
a.cls = float_class_qnan;
|
||||
return a;
|
||||
return frac;
|
||||
}
|
||||
|
||||
static void parts64_silence_nan(FloatParts64 *p, float_status *status)
|
||||
{
|
||||
p->frac = parts_silence_nan_frac(p->frac, status);
|
||||
p->cls = float_class_qnan;
|
||||
}
|
||||
|
||||
static void parts128_silence_nan(FloatParts128 *p, float_status *status)
|
||||
{
|
||||
p->frac_hi = parts_silence_nan_frac(p->frac_hi, status);
|
||||
p->cls = float_class_qnan;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
|
@ -227,18 +256,6 @@ floatx80 floatx80_default_nan(float_status *status)
|
|||
const floatx80 floatx80_infinity
|
||||
= make_floatx80_init(floatx80_infinity_high, floatx80_infinity_low);
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Raises the exceptions specified by `flags'. Floating-point traps can be
|
||||
| defined here if desired. It is currently not possible for such a trap
|
||||
| to substitute a result value. If traps are not implemented, this routine
|
||||
| should be simply `float_exception_flags |= flags;'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
void float_raise(uint8_t flags, float_status *status)
|
||||
{
|
||||
status->float_exception_flags |= flags;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Internal canonical NaN format.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
@ -1070,25 +1087,6 @@ bool float128_is_signaling_nan(float128 a, float_status *status)
|
|||
}
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns a quiet NaN from a signalling NaN for the quadruple-precision
|
||||
| floating point value `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
float128 float128_silence_nan(float128 a, float_status *status)
|
||||
{
|
||||
if (no_signaling_nans(status)) {
|
||||
g_assert_not_reached();
|
||||
} else {
|
||||
if (snan_bit_is_one(status)) {
|
||||
return float128_default_nan(status);
|
||||
} else {
|
||||
a.high |= UINT64_C(0x0000800000000000);
|
||||
return a;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the result of converting the quadruple-precision floating-point NaN
|
||||
| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
|
||||
|
|
3701
fpu/softfloat.c
3701
fpu/softfloat.c
File diff suppressed because it is too large
Load Diff
|
@ -83,6 +83,43 @@ this code that are retained.
|
|||
#define FPU_SOFTFLOAT_MACROS_H
|
||||
|
||||
#include "fpu/softfloat-types.h"
|
||||
#include "qemu/host-utils.h"
|
||||
|
||||
/**
|
||||
* shl_double: double-word merging left shift
|
||||
* @l: left or most-significant word
|
||||
* @r: right or least-significant word
|
||||
* @c: shift count
|
||||
*
|
||||
* Shift @l left by @c bits, shifting in bits from @r.
|
||||
*/
|
||||
static inline uint64_t shl_double(uint64_t l, uint64_t r, int c)
|
||||
{
|
||||
#if defined(__x86_64__)
|
||||
asm("shld %b2, %1, %0" : "+r"(l) : "r"(r), "ci"(c));
|
||||
return l;
|
||||
#else
|
||||
return c ? (l << c) | (r >> (64 - c)) : l;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* shr_double: double-word merging right shift
|
||||
* @l: left or most-significant word
|
||||
* @r: right or least-significant word
|
||||
* @c: shift count
|
||||
*
|
||||
* Shift @r right by @c bits, shifting in bits from @l.
|
||||
*/
|
||||
static inline uint64_t shr_double(uint64_t l, uint64_t r, int c)
|
||||
{
|
||||
#if defined(__x86_64__)
|
||||
asm("shrd %b2, %1, %0" : "+r"(r) : "r"(l), "ci"(c));
|
||||
return r;
|
||||
#else
|
||||
return c ? (r >> c) | (l << (64 - c)) : r;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Shifts `a' right by the number of bits given in `count'. If any nonzero
|
||||
|
@ -403,16 +440,12 @@ static inline void
|
|||
| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static inline void
|
||||
add128(
|
||||
uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
|
||||
static inline void add128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
|
||||
uint64_t *z0Ptr, uint64_t *z1Ptr)
|
||||
{
|
||||
uint64_t z1;
|
||||
|
||||
z1 = a1 + b1;
|
||||
*z1Ptr = z1;
|
||||
*z0Ptr = a0 + b0 + ( z1 < a1 );
|
||||
|
||||
bool c = 0;
|
||||
*z1Ptr = uadd64_carry(a1, b1, &c);
|
||||
*z0Ptr = uadd64_carry(a0, b0, &c);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
|
@ -423,34 +456,14 @@ static inline void
|
|||
| `z1Ptr', and `z2Ptr'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static inline void
|
||||
add192(
|
||||
uint64_t a0,
|
||||
uint64_t a1,
|
||||
uint64_t a2,
|
||||
uint64_t b0,
|
||||
uint64_t b1,
|
||||
uint64_t b2,
|
||||
uint64_t *z0Ptr,
|
||||
uint64_t *z1Ptr,
|
||||
uint64_t *z2Ptr
|
||||
)
|
||||
static inline void add192(uint64_t a0, uint64_t a1, uint64_t a2,
|
||||
uint64_t b0, uint64_t b1, uint64_t b2,
|
||||
uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
|
||||
{
|
||||
uint64_t z0, z1, z2;
|
||||
int8_t carry0, carry1;
|
||||
|
||||
z2 = a2 + b2;
|
||||
carry1 = ( z2 < a2 );
|
||||
z1 = a1 + b1;
|
||||
carry0 = ( z1 < a1 );
|
||||
z0 = a0 + b0;
|
||||
z1 += carry1;
|
||||
z0 += ( z1 < carry1 );
|
||||
z0 += carry0;
|
||||
*z2Ptr = z2;
|
||||
*z1Ptr = z1;
|
||||
*z0Ptr = z0;
|
||||
|
||||
bool c = 0;
|
||||
*z2Ptr = uadd64_carry(a2, b2, &c);
|
||||
*z1Ptr = uadd64_carry(a1, b1, &c);
|
||||
*z0Ptr = uadd64_carry(a0, b0, &c);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
|
@ -461,14 +474,12 @@ static inline void
|
|||
| `z1Ptr'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static inline void
|
||||
sub128(
|
||||
uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
|
||||
static inline void sub128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
|
||||
uint64_t *z0Ptr, uint64_t *z1Ptr)
|
||||
{
|
||||
|
||||
*z1Ptr = a1 - b1;
|
||||
*z0Ptr = a0 - b0 - ( a1 < b1 );
|
||||
|
||||
bool c = 0;
|
||||
*z1Ptr = usub64_borrow(a1, b1, &c);
|
||||
*z0Ptr = usub64_borrow(a0, b0, &c);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
|
@ -479,34 +490,14 @@ static inline void
|
|||
| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static inline void
|
||||
sub192(
|
||||
uint64_t a0,
|
||||
uint64_t a1,
|
||||
uint64_t a2,
|
||||
uint64_t b0,
|
||||
uint64_t b1,
|
||||
uint64_t b2,
|
||||
uint64_t *z0Ptr,
|
||||
uint64_t *z1Ptr,
|
||||
uint64_t *z2Ptr
|
||||
)
|
||||
static inline void sub192(uint64_t a0, uint64_t a1, uint64_t a2,
|
||||
uint64_t b0, uint64_t b1, uint64_t b2,
|
||||
uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
|
||||
{
|
||||
uint64_t z0, z1, z2;
|
||||
int8_t borrow0, borrow1;
|
||||
|
||||
z2 = a2 - b2;
|
||||
borrow1 = ( a2 < b2 );
|
||||
z1 = a1 - b1;
|
||||
borrow0 = ( a1 < b1 );
|
||||
z0 = a0 - b0;
|
||||
z0 -= ( z1 < borrow1 );
|
||||
z1 -= borrow1;
|
||||
z0 -= borrow0;
|
||||
*z2Ptr = z2;
|
||||
*z1Ptr = z1;
|
||||
*z0Ptr = z0;
|
||||
|
||||
bool c = 0;
|
||||
*z2Ptr = usub64_borrow(a2, b2, &c);
|
||||
*z1Ptr = usub64_borrow(a1, b1, &c);
|
||||
*z0Ptr = usub64_borrow(a0, b0, &c);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
|
@ -515,27 +506,10 @@ static inline void
|
|||
| `z0Ptr' and `z1Ptr'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
|
||||
static inline void
|
||||
mul64To128(uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr)
|
||||
{
|
||||
uint32_t aHigh, aLow, bHigh, bLow;
|
||||
uint64_t z0, zMiddleA, zMiddleB, z1;
|
||||
|
||||
aLow = a;
|
||||
aHigh = a>>32;
|
||||
bLow = b;
|
||||
bHigh = b>>32;
|
||||
z1 = ( (uint64_t) aLow ) * bLow;
|
||||
zMiddleA = ( (uint64_t) aLow ) * bHigh;
|
||||
zMiddleB = ( (uint64_t) aHigh ) * bLow;
|
||||
z0 = ( (uint64_t) aHigh ) * bHigh;
|
||||
zMiddleA += zMiddleB;
|
||||
z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
|
||||
zMiddleA <<= 32;
|
||||
z1 += zMiddleA;
|
||||
z0 += ( z1 < zMiddleA );
|
||||
*z1Ptr = z1;
|
||||
*z0Ptr = z0;
|
||||
|
||||
mulu64(z1Ptr, z0Ptr, a, b);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
|
@ -546,24 +520,14 @@ static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t
|
|||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static inline void
|
||||
mul128By64To192(
|
||||
uint64_t a0,
|
||||
uint64_t a1,
|
||||
uint64_t b,
|
||||
uint64_t *z0Ptr,
|
||||
uint64_t *z1Ptr,
|
||||
uint64_t *z2Ptr
|
||||
)
|
||||
mul128By64To192(uint64_t a0, uint64_t a1, uint64_t b,
|
||||
uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
|
||||
{
|
||||
uint64_t z0, z1, z2, more1;
|
||||
|
||||
mul64To128( a1, b, &z1, &z2 );
|
||||
mul64To128( a0, b, &z0, &more1 );
|
||||
add128( z0, more1, 0, z1, &z0, &z1 );
|
||||
*z2Ptr = z2;
|
||||
*z1Ptr = z1;
|
||||
*z0Ptr = z0;
|
||||
uint64_t z0, z1, m1;
|
||||
|
||||
mul64To128(a1, b, &m1, z2Ptr);
|
||||
mul64To128(a0, b, &z0, &z1);
|
||||
add128(z0, z1, 0, m1, z0Ptr, z1Ptr);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
|
@ -573,34 +537,21 @@ static inline void
|
|||
| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static inline void
|
||||
mul128To256(
|
||||
uint64_t a0,
|
||||
uint64_t a1,
|
||||
uint64_t b0,
|
||||
uint64_t b1,
|
||||
uint64_t *z0Ptr,
|
||||
uint64_t *z1Ptr,
|
||||
uint64_t *z2Ptr,
|
||||
uint64_t *z3Ptr
|
||||
)
|
||||
static inline void mul128To256(uint64_t a0, uint64_t a1,
|
||||
uint64_t b0, uint64_t b1,
|
||||
uint64_t *z0Ptr, uint64_t *z1Ptr,
|
||||
uint64_t *z2Ptr, uint64_t *z3Ptr)
|
||||
{
|
||||
uint64_t z0, z1, z2, z3;
|
||||
uint64_t more1, more2;
|
||||
uint64_t z0, z1, z2;
|
||||
uint64_t m0, m1, m2, n1, n2;
|
||||
|
||||
mul64To128( a1, b1, &z2, &z3 );
|
||||
mul64To128( a1, b0, &z1, &more2 );
|
||||
add128( z1, more2, 0, z2, &z1, &z2 );
|
||||
mul64To128( a0, b0, &z0, &more1 );
|
||||
add128( z0, more1, 0, z1, &z0, &z1 );
|
||||
mul64To128( a0, b1, &more1, &more2 );
|
||||
add128( more1, more2, 0, z2, &more1, &z2 );
|
||||
add128( z0, z1, 0, more1, &z0, &z1 );
|
||||
*z3Ptr = z3;
|
||||
*z2Ptr = z2;
|
||||
*z1Ptr = z1;
|
||||
*z0Ptr = z0;
|
||||
mul64To128(a1, b0, &m1, &m2);
|
||||
mul64To128(a0, b1, &n1, &n2);
|
||||
mul64To128(a1, b1, &z2, z3Ptr);
|
||||
mul64To128(a0, b0, &z0, &z1);
|
||||
|
||||
add192( 0, m1, m2, 0, n1, n2, &m0, &m1, &m2);
|
||||
add192(m0, m1, m2, z0, z1, z2, z0Ptr, z1Ptr, z2Ptr);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
|
|
|
@ -100,7 +100,10 @@ typedef enum {
|
|||
| Routine to raise any or all of the software IEC/IEEE floating-point
|
||||
| exception flags.
|
||||
*----------------------------------------------------------------------------*/
|
||||
void float_raise(uint8_t flags, float_status *status);
|
||||
static inline void float_raise(uint8_t flags, float_status *status)
|
||||
{
|
||||
status->float_exception_flags |= flags;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| If `a' is denormal and we are in flush-to-zero mode then set the
|
||||
|
@ -1194,6 +1197,8 @@ float128 float128_round_to_int(float128, float_status *status);
|
|||
float128 float128_add(float128, float128, float_status *status);
|
||||
float128 float128_sub(float128, float128, float_status *status);
|
||||
float128 float128_mul(float128, float128, float_status *status);
|
||||
float128 float128_muladd(float128, float128, float128, int,
|
||||
float_status *status);
|
||||
float128 float128_div(float128, float128, float_status *status);
|
||||
float128 float128_rem(float128, float128, float_status *status);
|
||||
float128 float128_sqrt(float128, float_status *status);
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#ifndef HOST_UTILS_H
|
||||
#define HOST_UTILS_H
|
||||
|
||||
#include "qemu/compiler.h"
|
||||
#include "qemu/bswap.h"
|
||||
|
||||
#ifdef CONFIG_INT128
|
||||
|
@ -272,6 +273,9 @@ static inline int ctpop64(uint64_t val)
|
|||
*/
|
||||
static inline uint8_t revbit8(uint8_t x)
|
||||
{
|
||||
#if __has_builtin(__builtin_bitreverse8)
|
||||
return __builtin_bitreverse8(x);
|
||||
#else
|
||||
/* Assign the correct nibble position. */
|
||||
x = ((x & 0xf0) >> 4)
|
||||
| ((x & 0x0f) << 4);
|
||||
|
@ -281,6 +285,7 @@ static inline uint8_t revbit8(uint8_t x)
|
|||
| ((x & 0x22) << 1)
|
||||
| ((x & 0x11) << 3);
|
||||
return x;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -289,6 +294,9 @@ static inline uint8_t revbit8(uint8_t x)
|
|||
*/
|
||||
static inline uint16_t revbit16(uint16_t x)
|
||||
{
|
||||
#if __has_builtin(__builtin_bitreverse16)
|
||||
return __builtin_bitreverse16(x);
|
||||
#else
|
||||
/* Assign the correct byte position. */
|
||||
x = bswap16(x);
|
||||
/* Assign the correct nibble position. */
|
||||
|
@ -300,6 +308,7 @@ static inline uint16_t revbit16(uint16_t x)
|
|||
| ((x & 0x2222) << 1)
|
||||
| ((x & 0x1111) << 3);
|
||||
return x;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -308,6 +317,9 @@ static inline uint16_t revbit16(uint16_t x)
|
|||
*/
|
||||
static inline uint32_t revbit32(uint32_t x)
|
||||
{
|
||||
#if __has_builtin(__builtin_bitreverse32)
|
||||
return __builtin_bitreverse32(x);
|
||||
#else
|
||||
/* Assign the correct byte position. */
|
||||
x = bswap32(x);
|
||||
/* Assign the correct nibble position. */
|
||||
|
@ -319,6 +331,7 @@ static inline uint32_t revbit32(uint32_t x)
|
|||
| ((x & 0x22222222u) << 1)
|
||||
| ((x & 0x11111111u) << 3);
|
||||
return x;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -327,6 +340,9 @@ static inline uint32_t revbit32(uint32_t x)
|
|||
*/
|
||||
static inline uint64_t revbit64(uint64_t x)
|
||||
{
|
||||
#if __has_builtin(__builtin_bitreverse64)
|
||||
return __builtin_bitreverse64(x);
|
||||
#else
|
||||
/* Assign the correct byte position. */
|
||||
x = bswap64(x);
|
||||
/* Assign the correct nibble position. */
|
||||
|
@ -338,6 +354,281 @@ static inline uint64_t revbit64(uint64_t x)
|
|||
| ((x & 0x2222222222222222ull) << 1)
|
||||
| ((x & 0x1111111111111111ull) << 3);
|
||||
return x;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* sadd32_overflow - addition with overflow indication
|
||||
* @x, @y: addends
|
||||
* @ret: Output for sum
|
||||
*
|
||||
* Computes *@ret = @x + @y, and returns true if and only if that
|
||||
* value has been truncated.
|
||||
*/
|
||||
static inline bool sadd32_overflow(int32_t x, int32_t y, int32_t *ret)
|
||||
{
|
||||
#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
|
||||
return __builtin_add_overflow(x, y, ret);
|
||||
#else
|
||||
*ret = x + y;
|
||||
return ((*ret ^ x) & ~(x ^ y)) < 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* sadd64_overflow - addition with overflow indication
|
||||
* @x, @y: addends
|
||||
* @ret: Output for sum
|
||||
*
|
||||
* Computes *@ret = @x + @y, and returns true if and only if that
|
||||
* value has been truncated.
|
||||
*/
|
||||
static inline bool sadd64_overflow(int64_t x, int64_t y, int64_t *ret)
|
||||
{
|
||||
#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
|
||||
return __builtin_add_overflow(x, y, ret);
|
||||
#else
|
||||
*ret = x + y;
|
||||
return ((*ret ^ x) & ~(x ^ y)) < 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* uadd32_overflow - addition with overflow indication
|
||||
* @x, @y: addends
|
||||
* @ret: Output for sum
|
||||
*
|
||||
* Computes *@ret = @x + @y, and returns true if and only if that
|
||||
* value has been truncated.
|
||||
*/
|
||||
static inline bool uadd32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
|
||||
{
|
||||
#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
|
||||
return __builtin_add_overflow(x, y, ret);
|
||||
#else
|
||||
*ret = x + y;
|
||||
return *ret < x;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* uadd64_overflow - addition with overflow indication
|
||||
* @x, @y: addends
|
||||
* @ret: Output for sum
|
||||
*
|
||||
* Computes *@ret = @x + @y, and returns true if and only if that
|
||||
* value has been truncated.
|
||||
*/
|
||||
static inline bool uadd64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
|
||||
{
|
||||
#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
|
||||
return __builtin_add_overflow(x, y, ret);
|
||||
#else
|
||||
*ret = x + y;
|
||||
return *ret < x;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* ssub32_overflow - subtraction with overflow indication
|
||||
* @x: Minuend
|
||||
* @y: Subtrahend
|
||||
* @ret: Output for difference
|
||||
*
|
||||
* Computes *@ret = @x - @y, and returns true if and only if that
|
||||
* value has been truncated.
|
||||
*/
|
||||
static inline bool ssub32_overflow(int32_t x, int32_t y, int32_t *ret)
|
||||
{
|
||||
#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
|
||||
return __builtin_sub_overflow(x, y, ret);
|
||||
#else
|
||||
*ret = x - y;
|
||||
return ((*ret ^ x) & (x ^ y)) < 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* ssub64_overflow - subtraction with overflow indication
|
||||
* @x: Minuend
|
||||
* @y: Subtrahend
|
||||
* @ret: Output for sum
|
||||
*
|
||||
* Computes *@ret = @x - @y, and returns true if and only if that
|
||||
* value has been truncated.
|
||||
*/
|
||||
static inline bool ssub64_overflow(int64_t x, int64_t y, int64_t *ret)
|
||||
{
|
||||
#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
|
||||
return __builtin_sub_overflow(x, y, ret);
|
||||
#else
|
||||
*ret = x - y;
|
||||
return ((*ret ^ x) & (x ^ y)) < 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* usub32_overflow - subtraction with overflow indication
|
||||
* @x: Minuend
|
||||
* @y: Subtrahend
|
||||
* @ret: Output for sum
|
||||
*
|
||||
* Computes *@ret = @x - @y, and returns true if and only if that
|
||||
* value has been truncated.
|
||||
*/
|
||||
static inline bool usub32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
|
||||
{
|
||||
#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
|
||||
return __builtin_sub_overflow(x, y, ret);
|
||||
#else
|
||||
*ret = x - y;
|
||||
return x < y;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* usub64_overflow - subtraction with overflow indication
|
||||
* @x: Minuend
|
||||
* @y: Subtrahend
|
||||
* @ret: Output for sum
|
||||
*
|
||||
* Computes *@ret = @x - @y, and returns true if and only if that
|
||||
* value has been truncated.
|
||||
*/
|
||||
static inline bool usub64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
|
||||
{
|
||||
#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
|
||||
return __builtin_sub_overflow(x, y, ret);
|
||||
#else
|
||||
*ret = x - y;
|
||||
return x < y;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* smul32_overflow - multiplication with overflow indication
|
||||
* @x, @y: Input multipliers
|
||||
* @ret: Output for product
|
||||
*
|
||||
* Computes *@ret = @x * @y, and returns true if and only if that
|
||||
* value has been truncated.
|
||||
*/
|
||||
static inline bool smul32_overflow(int32_t x, int32_t y, int32_t *ret)
|
||||
{
|
||||
#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
|
||||
return __builtin_mul_overflow(x, y, ret);
|
||||
#else
|
||||
int64_t z = (int64_t)x * y;
|
||||
*ret = z;
|
||||
return *ret != z;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* smul64_overflow - multiplication with overflow indication
|
||||
* @x, @y: Input multipliers
|
||||
* @ret: Output for product
|
||||
*
|
||||
* Computes *@ret = @x * @y, and returns true if and only if that
|
||||
* value has been truncated.
|
||||
*/
|
||||
static inline bool smul64_overflow(int64_t x, int64_t y, int64_t *ret)
|
||||
{
|
||||
#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
|
||||
return __builtin_mul_overflow(x, y, ret);
|
||||
#else
|
||||
uint64_t hi, lo;
|
||||
muls64(&lo, &hi, x, y);
|
||||
*ret = lo;
|
||||
return hi != ((int64_t)lo >> 63);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* umul32_overflow - multiplication with overflow indication
|
||||
* @x, @y: Input multipliers
|
||||
* @ret: Output for product
|
||||
*
|
||||
* Computes *@ret = @x * @y, and returns true if and only if that
|
||||
* value has been truncated.
|
||||
*/
|
||||
static inline bool umul32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
|
||||
{
|
||||
#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
|
||||
return __builtin_mul_overflow(x, y, ret);
|
||||
#else
|
||||
uint64_t z = (uint64_t)x * y;
|
||||
*ret = z;
|
||||
return z > UINT32_MAX;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* umul64_overflow - multiplication with overflow indication
|
||||
* @x, @y: Input multipliers
|
||||
* @ret: Output for product
|
||||
*
|
||||
* Computes *@ret = @x * @y, and returns true if and only if that
|
||||
* value has been truncated.
|
||||
*/
|
||||
static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
|
||||
{
|
||||
#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
|
||||
return __builtin_mul_overflow(x, y, ret);
|
||||
#else
|
||||
uint64_t hi;
|
||||
mulu64(ret, &hi, x, y);
|
||||
return hi != 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* uadd64_carry - addition with carry-in and carry-out
|
||||
* @x, @y: addends
|
||||
* @pcarry: in-out carry value
|
||||
*
|
||||
* Computes @x + @y + *@pcarry, placing the carry-out back
|
||||
* into *@pcarry and returning the 64-bit sum.
|
||||
*/
|
||||
static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
|
||||
{
|
||||
#if __has_builtin(__builtin_addcll)
|
||||
unsigned long long c = *pcarry;
|
||||
x = __builtin_addcll(x, y, c, &c);
|
||||
*pcarry = c & 1;
|
||||
return x;
|
||||
#else
|
||||
bool c = *pcarry;
|
||||
/* This is clang's internal expansion of __builtin_addc. */
|
||||
c = uadd64_overflow(x, c, &x);
|
||||
c |= uadd64_overflow(x, y, &x);
|
||||
*pcarry = c;
|
||||
return x;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* usub64_borrow - subtraction with borrow-in and borrow-out
|
||||
* @x, @y: addends
|
||||
* @pborrow: in-out borrow value
|
||||
*
|
||||
* Computes @x - @y - *@pborrow, placing the borrow-out back
|
||||
* into *@pborrow and returning the 64-bit sum.
|
||||
*/
|
||||
static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow)
|
||||
{
|
||||
#if __has_builtin(__builtin_subcll)
|
||||
unsigned long long b = *pborrow;
|
||||
x = __builtin_subcll(x, y, b, &b);
|
||||
*pborrow = b & 1;
|
||||
return x;
|
||||
#else
|
||||
bool b = *pborrow;
|
||||
b = usub64_overflow(x, b, &x);
|
||||
b |= usub64_overflow(x, y, &x);
|
||||
*pborrow = b;
|
||||
return x;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Host type specific sizes of these routines. */
|
||||
|
|
|
@ -27,8 +27,14 @@ static inline void restore_flush_mode(CPUMIPSState *env)
|
|||
|
||||
static inline void restore_snan_bit_mode(CPUMIPSState *env)
|
||||
{
|
||||
set_snan_bit_is_one((env->active_fpu.fcr31 & (1 << FCR31_NAN2008)) == 0,
|
||||
&env->active_fpu.fp_status);
|
||||
bool nan2008 = env->active_fpu.fcr31 & (1 << FCR31_NAN2008);
|
||||
|
||||
/*
|
||||
* With nan2008, SNaNs are silenced in the usual way.
|
||||
* Before that, SNaNs are not silenced; default nans are produced.
|
||||
*/
|
||||
set_snan_bit_is_one(!nan2008, &env->active_fpu.fp_status);
|
||||
set_default_nan_mode(!nan2008, &env->active_fpu.fp_status);
|
||||
}
|
||||
|
||||
static inline void restore_fp_status(CPUMIPSState *env)
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include <math.h>
|
||||
#include <fenv.h>
|
||||
#include "qemu/timer.h"
|
||||
#include "qemu/int128.h"
|
||||
#include "fpu/softfloat.h"
|
||||
|
||||
/* amortize the computation of random inputs */
|
||||
|
@ -50,8 +51,10 @@ static const char * const op_names[] = {
|
|||
enum precision {
|
||||
PREC_SINGLE,
|
||||
PREC_DOUBLE,
|
||||
PREC_QUAD,
|
||||
PREC_FLOAT32,
|
||||
PREC_FLOAT64,
|
||||
PREC_FLOAT128,
|
||||
PREC_MAX_NR,
|
||||
};
|
||||
|
||||
|
@ -89,6 +92,7 @@ union fp {
|
|||
double d;
|
||||
float32 f32;
|
||||
float64 f64;
|
||||
float128 f128;
|
||||
uint64_t u64;
|
||||
};
|
||||
|
||||
|
@ -113,6 +117,10 @@ struct op_desc {
|
|||
static uint64_t random_ops[MAX_OPERANDS] = {
|
||||
SEED_A, SEED_B, SEED_C,
|
||||
};
|
||||
|
||||
static float128 random_quad_ops[MAX_OPERANDS] = {
|
||||
{SEED_A, SEED_B}, {SEED_B, SEED_C}, {SEED_C, SEED_A},
|
||||
};
|
||||
static float_status soft_status;
|
||||
static enum precision precision;
|
||||
static enum op operation;
|
||||
|
@ -141,25 +149,45 @@ static void update_random_ops(int n_ops, enum precision prec)
|
|||
int i;
|
||||
|
||||
for (i = 0; i < n_ops; i++) {
|
||||
uint64_t r = random_ops[i];
|
||||
|
||||
switch (prec) {
|
||||
case PREC_SINGLE:
|
||||
case PREC_FLOAT32:
|
||||
{
|
||||
uint64_t r = random_ops[i];
|
||||
do {
|
||||
r = xorshift64star(r);
|
||||
} while (!float32_is_normal(r));
|
||||
random_ops[i] = r;
|
||||
break;
|
||||
}
|
||||
case PREC_DOUBLE:
|
||||
case PREC_FLOAT64:
|
||||
{
|
||||
uint64_t r = random_ops[i];
|
||||
do {
|
||||
r = xorshift64star(r);
|
||||
} while (!float64_is_normal(r));
|
||||
random_ops[i] = r;
|
||||
break;
|
||||
}
|
||||
case PREC_QUAD:
|
||||
case PREC_FLOAT128:
|
||||
{
|
||||
float128 r = random_quad_ops[i];
|
||||
uint64_t hi = r.high;
|
||||
uint64_t lo = r.low;
|
||||
do {
|
||||
hi = xorshift64star(hi);
|
||||
lo = xorshift64star(lo);
|
||||
r = make_float128(hi, lo);
|
||||
} while (!float128_is_normal(r));
|
||||
random_quad_ops[i] = r;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
random_ops[i] = r;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -184,6 +212,13 @@ static void fill_random(union fp *ops, int n_ops, enum precision prec,
|
|||
ops[i].f64 = float64_chs(ops[i].f64);
|
||||
}
|
||||
break;
|
||||
case PREC_QUAD:
|
||||
case PREC_FLOAT128:
|
||||
ops[i].f128 = random_quad_ops[i];
|
||||
if (no_neg && float128_is_neg(ops[i].f128)) {
|
||||
ops[i].f128 = float128_chs(ops[i].f128);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
@ -345,6 +380,41 @@ static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
|
|||
}
|
||||
}
|
||||
break;
|
||||
case PREC_FLOAT128:
|
||||
fill_random(ops, n_ops, prec, no_neg);
|
||||
t0 = get_clock();
|
||||
for (i = 0; i < OPS_PER_ITER; i++) {
|
||||
float128 a = ops[0].f128;
|
||||
float128 b = ops[1].f128;
|
||||
float128 c = ops[2].f128;
|
||||
|
||||
switch (op) {
|
||||
case OP_ADD:
|
||||
res.f128 = float128_add(a, b, &soft_status);
|
||||
break;
|
||||
case OP_SUB:
|
||||
res.f128 = float128_sub(a, b, &soft_status);
|
||||
break;
|
||||
case OP_MUL:
|
||||
res.f128 = float128_mul(a, b, &soft_status);
|
||||
break;
|
||||
case OP_DIV:
|
||||
res.f128 = float128_div(a, b, &soft_status);
|
||||
break;
|
||||
case OP_FMA:
|
||||
res.f128 = float128_muladd(a, b, c, 0, &soft_status);
|
||||
break;
|
||||
case OP_SQRT:
|
||||
res.f128 = float128_sqrt(a, &soft_status);
|
||||
break;
|
||||
case OP_CMP:
|
||||
res.u64 = float128_compare_quiet(a, b, &soft_status);
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
@ -369,7 +439,8 @@ static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
|
|||
GEN_BENCH(bench_ ## opname ## _float, float, PREC_SINGLE, op, n_ops) \
|
||||
GEN_BENCH(bench_ ## opname ## _double, double, PREC_DOUBLE, op, n_ops) \
|
||||
GEN_BENCH(bench_ ## opname ## _float32, float32, PREC_FLOAT32, op, n_ops) \
|
||||
GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops)
|
||||
GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops) \
|
||||
GEN_BENCH(bench_ ## opname ## _float128, float128, PREC_FLOAT128, op, n_ops)
|
||||
|
||||
GEN_BENCH_ALL_TYPES(add, OP_ADD, 2)
|
||||
GEN_BENCH_ALL_TYPES(sub, OP_SUB, 2)
|
||||
|
@ -383,7 +454,8 @@ GEN_BENCH_ALL_TYPES(cmp, OP_CMP, 2)
|
|||
GEN_BENCH_NO_NEG(bench_ ## name ## _float, float, PREC_SINGLE, op, n) \
|
||||
GEN_BENCH_NO_NEG(bench_ ## name ## _double, double, PREC_DOUBLE, op, n) \
|
||||
GEN_BENCH_NO_NEG(bench_ ## name ## _float32, float32, PREC_FLOAT32, op, n) \
|
||||
GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n)
|
||||
GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n) \
|
||||
GEN_BENCH_NO_NEG(bench_ ## name ## _float128, float128, PREC_FLOAT128, op, n)
|
||||
|
||||
GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
|
||||
#undef GEN_BENCH_ALL_TYPES_NO_NEG
|
||||
|
@ -397,6 +469,7 @@ GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
|
|||
[PREC_DOUBLE] = bench_ ## opname ## _double, \
|
||||
[PREC_FLOAT32] = bench_ ## opname ## _float32, \
|
||||
[PREC_FLOAT64] = bench_ ## opname ## _float64, \
|
||||
[PREC_FLOAT128] = bench_ ## opname ## _float128, \
|
||||
}
|
||||
|
||||
static const bench_func_t bench_funcs[OP_MAX_NR][PREC_MAX_NR] = {
|
||||
|
@ -445,7 +518,7 @@ static void usage_complete(int argc, char *argv[])
|
|||
fprintf(stderr, " -h = show this help message.\n");
|
||||
fprintf(stderr, " -o = floating point operation (%s). Default: %s\n",
|
||||
op_list, op_names[0]);
|
||||
fprintf(stderr, " -p = floating point precision (single, double). "
|
||||
fprintf(stderr, " -p = floating point precision (single, double, quad[soft only]). "
|
||||
"Default: single\n");
|
||||
fprintf(stderr, " -r = rounding mode (even, zero, down, up, tieaway). "
|
||||
"Default: even\n");
|
||||
|
@ -565,6 +638,8 @@ static void parse_args(int argc, char *argv[])
|
|||
precision = PREC_SINGLE;
|
||||
} else if (!strcmp(optarg, "double")) {
|
||||
precision = PREC_DOUBLE;
|
||||
} else if (!strcmp(optarg, "quad")) {
|
||||
precision = PREC_QUAD;
|
||||
} else {
|
||||
fprintf(stderr, "Unsupported precision '%s'\n", optarg);
|
||||
exit(EXIT_FAILURE);
|
||||
|
@ -608,6 +683,9 @@ static void parse_args(int argc, char *argv[])
|
|||
case PREC_DOUBLE:
|
||||
precision = PREC_FLOAT64;
|
||||
break;
|
||||
case PREC_QUAD:
|
||||
precision = PREC_FLOAT128;
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
|
|
@ -717,7 +717,7 @@ static void do_testfloat(int op, int rmode, bool exact)
|
|||
test_abz_f128(true_abz_f128M, subj_abz_f128M);
|
||||
break;
|
||||
case F128_MULADD:
|
||||
not_implemented();
|
||||
test_abcz_f128(slow_f128M_mulAdd, qemu_f128M_mulAdd);
|
||||
break;
|
||||
case F128_SQRT:
|
||||
test_az_f128(slow_f128M_sqrt, qemu_f128M_sqrt);
|
||||
|
|
|
@ -574,6 +574,18 @@ WRAP_MULADD(qemu_f32_mulAdd, float32_muladd, float32)
|
|||
WRAP_MULADD(qemu_f64_mulAdd, float64_muladd, float64)
|
||||
#undef WRAP_MULADD
|
||||
|
||||
static void qemu_f128M_mulAdd(const float128_t *ap, const float128_t *bp,
|
||||
const float128_t *cp, float128_t *res)
|
||||
{
|
||||
float128 a, b, c, ret;
|
||||
|
||||
a = soft_to_qemu128(*ap);
|
||||
b = soft_to_qemu128(*bp);
|
||||
c = soft_to_qemu128(*cp);
|
||||
ret = float128_muladd(a, b, c, 0, &qsf);
|
||||
*res = qemu_to_soft128(ret);
|
||||
}
|
||||
|
||||
#define WRAP_CMP16(name, func, retcond) \
|
||||
static bool name(float16_t a, float16_t b) \
|
||||
{ \
|
||||
|
|
Loading…
Reference in New Issue