From cbf4ad5498039105db7c41e16e116b5b6584a786 Mon Sep 17 00:00:00 2001 From: Paul Brook Date: Thu, 25 Aug 2022 23:58:15 +0200 Subject: [PATCH] target/i386: reimplement AVX comparison helpers AVX includes an additional set of comparison predicates, some of which our softfloat implementation does not expose as separate functions. Rewrite the helpers in terms of floatN_compare for future extensibility. Signed-off-by: Paul Brook Reviewed-by: Richard Henderson Message-Id: <20220424220204.2493824-24-paul@nowt.org> Signed-off-by: Paolo Bonzini --- target/i386/ops_sse.h | 97 ++++++++++++++++++++---------------- target/i386/ops_sse_header.h | 24 ++++----- target/i386/tcg/translate.c | 20 ++++---- 3 files changed, 75 insertions(+), 66 deletions(-) diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index 86ca74492e..7463ff1599 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -1005,57 +1005,66 @@ void glue(helper_addsubpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) } } -/* XXX: unordered */ -#define SSE_HELPER_CMP(name, F) \ - void glue(helper_ ## name ## ps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)\ +#define SSE_HELPER_CMP_P(name, F, C) \ + void glue(helper_ ## name ## ps, SUFFIX)(CPUX86State *env, \ + Reg *d, Reg *s) \ { \ - d->ZMM_L(0) = F(32, d->ZMM_S(0), s->ZMM_S(0)); \ - d->ZMM_L(1) = F(32, d->ZMM_S(1), s->ZMM_S(1)); \ - d->ZMM_L(2) = F(32, d->ZMM_S(2), s->ZMM_S(2)); \ - d->ZMM_L(3) = F(32, d->ZMM_S(3), s->ZMM_S(3)); \ + Reg *v = d; \ + int i; \ + for (i = 0; i < 2 << SHIFT; i++) { \ + d->ZMM_L(i) = C(F(32, v->ZMM_S(i), s->ZMM_S(i))) ? -1 : 0; \ + } \ } \ \ - void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *s) \ + void glue(helper_ ## name ## pd, SUFFIX)(CPUX86State *env, \ + Reg *d, Reg *s) \ { \ - d->ZMM_L(0) = F(32, d->ZMM_S(0), s->ZMM_S(0)); \ - } \ - \ - void glue(helper_ ## name ## pd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)\ - { \ - d->ZMM_Q(0) = F(64, d->ZMM_D(0), s->ZMM_D(0)); \ - d->ZMM_Q(1) = F(64, d->ZMM_D(1), s->ZMM_D(1)); \ - } \ - \ - void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *s) \ - { \ - d->ZMM_Q(0) = F(64, d->ZMM_D(0), s->ZMM_D(0)); \ + Reg *v = d; \ + int i; \ + for (i = 0; i < 1 << SHIFT; i++) { \ + d->ZMM_Q(i) = C(F(64, v->ZMM_D(i), s->ZMM_D(i))) ? -1 : 0; \ + } \ } -#define FPU_CMPEQ(size, a, b) \ - (float ## size ## _eq_quiet(a, b, &env->sse_status) ? -1 : 0) -#define FPU_CMPLT(size, a, b) \ - (float ## size ## _lt(a, b, &env->sse_status) ? -1 : 0) -#define FPU_CMPLE(size, a, b) \ - (float ## size ## _le(a, b, &env->sse_status) ? -1 : 0) -#define FPU_CMPUNORD(size, a, b) \ - (float ## size ## _unordered_quiet(a, b, &env->sse_status) ? -1 : 0) -#define FPU_CMPNEQ(size, a, b) \ - (float ## size ## _eq_quiet(a, b, &env->sse_status) ? 0 : -1) -#define FPU_CMPNLT(size, a, b) \ - (float ## size ## _lt(a, b, &env->sse_status) ? 0 : -1) -#define FPU_CMPNLE(size, a, b) \ - (float ## size ## _le(a, b, &env->sse_status) ? 0 : -1) -#define FPU_CMPORD(size, a, b) \ - (float ## size ## _unordered_quiet(a, b, &env->sse_status) ? 0 : -1) +#if SHIFT == 1 +#define SSE_HELPER_CMP(name, F, C) \ + SSE_HELPER_CMP_P(name, F, C) \ + void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *s) \ + { \ + Reg *v = d; \ + d->ZMM_L(0) = C(F(32, v->ZMM_S(0), s->ZMM_S(0))) ? -1 : 0; \ + } \ + \ + void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *s) \ + { \ + Reg *v = d; \ + d->ZMM_Q(0) = C(F(64, v->ZMM_D(0), s->ZMM_D(0))) ? -1 : 0; \ + } -SSE_HELPER_CMP(cmpeq, FPU_CMPEQ) -SSE_HELPER_CMP(cmplt, FPU_CMPLT) -SSE_HELPER_CMP(cmple, FPU_CMPLE) -SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD) -SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ) -SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT) -SSE_HELPER_CMP(cmpnle, FPU_CMPNLE) -SSE_HELPER_CMP(cmpord, FPU_CMPORD) +#define FPU_EQ(x) (x == float_relation_equal) +#define FPU_LT(x) (x == float_relation_less) +#define FPU_LE(x) (x <= float_relation_equal) +#define FPU_UNORD(x) (x == float_relation_unordered) + +#define FPU_CMPQ(size, a, b) \ + float ## size ## _compare_quiet(a, b, &env->sse_status) +#define FPU_CMPS(size, a, b) \ + float ## size ## _compare(a, b, &env->sse_status) + +#else +#define SSE_HELPER_CMP(name, F, C) SSE_HELPER_CMP_P(name, F, C) +#endif + +SSE_HELPER_CMP(cmpeq, FPU_CMPQ, FPU_EQ) +SSE_HELPER_CMP(cmplt, FPU_CMPS, FPU_LT) +SSE_HELPER_CMP(cmple, FPU_CMPS, FPU_LE) +SSE_HELPER_CMP(cmpunord, FPU_CMPQ, FPU_UNORD) +SSE_HELPER_CMP(cmpneq, FPU_CMPQ, !FPU_EQ) +SSE_HELPER_CMP(cmpnlt, FPU_CMPS, !FPU_LT) +SSE_HELPER_CMP(cmpnle, FPU_CMPS, !FPU_LE) +SSE_HELPER_CMP(cmpord, FPU_CMPQ, !FPU_UNORD) + +#undef SSE_HELPER_CMP static const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h index fc697536a0..d99464afb0 100644 --- a/target/i386/ops_sse_header.h +++ b/target/i386/ops_sse_header.h @@ -201,20 +201,20 @@ DEF_HELPER_3(glue(hsubpd, SUFFIX), void, env, ZMMReg, ZMMReg) DEF_HELPER_3(glue(addsubps, SUFFIX), void, env, ZMMReg, ZMMReg) DEF_HELPER_3(glue(addsubpd, SUFFIX), void, env, ZMMReg, ZMMReg) -#define SSE_HELPER_CMP(name, F) \ - DEF_HELPER_3(glue(name ## ps, SUFFIX), void, env, Reg, Reg) \ - DEF_HELPER_3(name ## ss, void, env, Reg, Reg) \ - DEF_HELPER_3(glue(name ## pd, SUFFIX), void, env, Reg, Reg) \ +#define SSE_HELPER_CMP(name, F, C) \ + DEF_HELPER_3(glue(name ## ps, SUFFIX), void, env, Reg, Reg) \ + DEF_HELPER_3(name ## ss, void, env, Reg, Reg) \ + DEF_HELPER_3(glue(name ## pd, SUFFIX), void, env, Reg, Reg) \ DEF_HELPER_3(name ## sd, void, env, Reg, Reg) -SSE_HELPER_CMP(cmpeq, FPU_CMPEQ) -SSE_HELPER_CMP(cmplt, FPU_CMPLT) -SSE_HELPER_CMP(cmple, FPU_CMPLE) -SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD) -SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ) -SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT) -SSE_HELPER_CMP(cmpnle, FPU_CMPNLE) -SSE_HELPER_CMP(cmpord, FPU_CMPORD) +SSE_HELPER_CMP(cmpeq, FPU_CMPQ, FPU_EQ) +SSE_HELPER_CMP(cmplt, FPU_CMPS, FPU_LT) +SSE_HELPER_CMP(cmple, FPU_CMPS, FPU_LE) +SSE_HELPER_CMP(cmpunord, FPU_CMPQ, FPU_UNORD) +SSE_HELPER_CMP(cmpneq, FPU_CMPQ, !FPU_EQ) +SSE_HELPER_CMP(cmpnlt, FPU_CMPS, !FPU_LT) +SSE_HELPER_CMP(cmpnle, FPU_CMPS, !FPU_LE) +SSE_HELPER_CMP(cmpord, FPU_CMPQ, !FPU_UNORD) DEF_HELPER_3(ucomiss, void, env, Reg, Reg) DEF_HELPER_3(comiss, void, env, Reg, Reg) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 99c84473f4..fc081e6ad6 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -3022,20 +3022,20 @@ static const SSEFunc_l_ep sse_op_table3bq[] = { }; #endif -#define SSE_FOP(x) { \ +#define SSE_CMP(x) { \ gen_helper_ ## x ## ps ## _xmm, gen_helper_ ## x ## pd ## _xmm, \ gen_helper_ ## x ## ss, gen_helper_ ## x ## sd} static const SSEFunc_0_epp sse_op_table4[8][4] = { - SSE_FOP(cmpeq), - SSE_FOP(cmplt), - SSE_FOP(cmple), - SSE_FOP(cmpunord), - SSE_FOP(cmpneq), - SSE_FOP(cmpnlt), - SSE_FOP(cmpnle), - SSE_FOP(cmpord), + SSE_CMP(cmpeq), + SSE_CMP(cmplt), + SSE_CMP(cmple), + SSE_CMP(cmpunord), + SSE_CMP(cmpneq), + SSE_CMP(cmpnlt), + SSE_CMP(cmpnle), + SSE_CMP(cmpord), }; -#undef SSE_FOP +#undef SSE_CMP static const SSEFunc_0_epp sse_op_table5[256] = { [0x0c] = gen_helper_pi2fw,