target/arm: Convert FMAXNMV, FMINNMV, FMAXV, FMINV to decodetree

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20240912024114.1097832-12-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2024-09-11 19:40:56 -07:00 committed by Peter Maydell
parent cc7ece7216
commit 3d44e070a6
2 changed files with 67 additions and 123 deletions

View File

@ -54,11 +54,13 @@
@rrx_d ........ .. . rm:5 .... idx:1 . rn:5 rd:5 &rrx_e esz=3
@rr_q1e0 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=0
@rr_q1e2 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=2
@r2r_q1e0 ........ ........ ...... rm:5 rd:5 &qrrr_e rn=%rd q=1 esz=0
@rrr_q1e0 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=0
@rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3
@rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3
@qrr_h . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=1
@qrr_e . q:1 ...... esz:2 ...... ...... rn:5 rd:5 &qrr_e
@qrrr_b . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=0
@ -1166,3 +1168,15 @@ SMAXV 0.00 1110 .. 11000 01010 10 ..... ..... @qrr_e
UMAXV 0.10 1110 .. 11000 01010 10 ..... ..... @qrr_e
SMINV 0.00 1110 .. 11000 11010 10 ..... ..... @qrr_e
UMINV 0.10 1110 .. 11000 11010 10 ..... ..... @qrr_e
FMAXNMV_h 0.00 1110 00 11000 01100 10 ..... ..... @qrr_h
FMAXNMV_s 0110 1110 00 11000 01100 10 ..... ..... @rr_q1e2
FMINNMV_h 0.00 1110 10 11000 01100 10 ..... ..... @qrr_h
FMINNMV_s 0110 1110 10 11000 01100 10 ..... ..... @rr_q1e2
FMAXV_h 0.00 1110 00 11000 01111 10 ..... ..... @qrr_h
FMAXV_s 0110 1110 00 11000 01111 10 ..... ..... @rr_q1e2
FMINV_h 0.00 1110 10 11000 01111 10 ..... ..... @qrr_h
FMINV_s 0110 1110 10 11000 01111 10 ..... ..... @rr_q1e2

View File

@ -6835,6 +6835,59 @@ TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
/*
* do_fp_reduction helper
*
* This mirrors the Reduce() pseudocode in the ARM ARM. It is
* important for correct NaN propagation that we do these
* operations in exactly the order specified by the pseudocode.
*
* This is a recursive function.
*/
static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
int ebase, int ecount, TCGv_ptr fpst,
NeonGenTwoSingleOpFn *fn)
{
if (ecount == 1) {
TCGv_i32 tcg_elem = tcg_temp_new_i32();
read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
return tcg_elem;
} else {
int half = ecount >> 1;
TCGv_i32 tcg_hi, tcg_lo, tcg_res;
tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn);
tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn);
tcg_res = tcg_temp_new_i32();
fn(tcg_res, tcg_lo, tcg_hi, fpst);
return tcg_res;
}
}
static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
NeonGenTwoSingleOpFn *fn)
{
if (fp_access_check(s)) {
MemOp esz = a->esz;
int elts = (a->q ? 16 : 8) >> esz;
TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn);
write_fp_sreg(s, a->rd, res);
}
return true;
}
TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxnumh)
TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minnumh)
TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxh)
TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minh)
TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums)
TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums)
TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs)
TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins)
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
* Note that it is the caller's responsibility to ensure that the
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
@ -9061,128 +9114,6 @@ static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
}
}
/*
* do_reduction_op helper
*
* This mirrors the Reduce() pseudocode in the ARM ARM. It is
* important for correct NaN propagation that we do these
* operations in exactly the order specified by the pseudocode.
*
* This is a recursive function.
*/
static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
MemOp esz, int ebase, int ecount, TCGv_ptr fpst)
{
if (ecount == 1) {
TCGv_i32 tcg_elem = tcg_temp_new_i32();
read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
return tcg_elem;
} else {
int half = ecount >> 1;
TCGv_i32 tcg_hi, tcg_lo, tcg_res;
tcg_hi = do_reduction_op(s, fpopcode, rn, esz,
ebase + half, half, fpst);
tcg_lo = do_reduction_op(s, fpopcode, rn, esz,
ebase, half, fpst);
tcg_res = tcg_temp_new_i32();
switch (fpopcode) {
case 0x0c: /* fmaxnmv half-precision */
gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
break;
case 0x0f: /* fmaxv half-precision */
gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
break;
case 0x1c: /* fminnmv half-precision */
gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
break;
case 0x1f: /* fminv half-precision */
gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
break;
case 0x2c: /* fmaxnmv */
gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
break;
case 0x2f: /* fmaxv */
gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
break;
case 0x3c: /* fminnmv */
gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
break;
case 0x3f: /* fminv */
gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
break;
default:
g_assert_not_reached();
}
return tcg_res;
}
}
/* AdvSIMD across lanes
* 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
* +---+---+---+-----------+------+-----------+--------+-----+------+------+
* | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
* +---+---+---+-----------+------+-----------+--------+-----+------+------+
*/
static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
{
int rd = extract32(insn, 0, 5);
int rn = extract32(insn, 5, 5);
int size = extract32(insn, 22, 2);
int opcode = extract32(insn, 12, 5);
bool is_q = extract32(insn, 30, 1);
bool is_u = extract32(insn, 29, 1);
bool is_min = false;
int elements;
switch (opcode) {
case 0xc: /* FMAXNMV, FMINNMV */
case 0xf: /* FMAXV, FMINV */
/* Bit 1 of size field encodes min vs max and the actual size
* depends on the encoding of the U bit. If not set (and FP16
* enabled) then we do half-precision float instead of single
* precision.
*/
is_min = extract32(size, 1, 1);
if (!is_u && dc_isar_feature(aa64_fp16, s)) {
size = 1;
} else if (!is_u || !is_q || extract32(size, 0, 1)) {
unallocated_encoding(s);
return;
} else {
size = 2;
}
break;
default:
case 0x3: /* SADDLV, UADDLV */
case 0xa: /* SMAXV, UMAXV */
case 0x1a: /* SMINV, UMINV */
case 0x1b: /* ADDV */
unallocated_encoding(s);
return;
}
if (!fp_access_check(s)) {
return;
}
elements = (is_q ? 16 : 8) >> size;
{
/* Floating point vector reduction ops which work across 32
* bit (single) or 16 bit (half-precision) intermediates.
* Note that correct NaN propagation requires that we do these
* operations in exactly the order specified by the pseudocode.
*/
TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
int fpopcode = opcode | is_min << 4 | is_u << 5;
TCGv_i32 tcg_res = do_reduction_op(s, fpopcode, rn, size,
0, elements, fpst);
write_fp_sreg(s, rd, tcg_res);
}
}
/* AdvSIMD modified immediate
* 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0
* +---+---+----+---------------------+-----+-------+----+---+-------+------+
@ -11735,7 +11666,6 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
static const AArch64DecodeTable data_proc_simd[] = {
/* pattern , mask , fn */
{ 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
{ 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
/* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
{ 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
{ 0x0f000400, 0x9f800400, disas_simd_shift_imm },