mirror of https://github.com/xemu-project/xemu.git
target/arm: Convert FMAXNMV, FMINNMV, FMAXV, FMINV to decodetree
Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20240912024114.1097832-12-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
cc7ece7216
commit
3d44e070a6
|
@ -54,11 +54,13 @@
|
|||
@rrx_d ........ .. . rm:5 .... idx:1 . rn:5 rd:5 &rrx_e esz=3
|
||||
|
||||
@rr_q1e0 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=0
|
||||
@rr_q1e2 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=2
|
||||
@r2r_q1e0 ........ ........ ...... rm:5 rd:5 &qrrr_e rn=%rd q=1 esz=0
|
||||
@rrr_q1e0 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=0
|
||||
@rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3
|
||||
@rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3
|
||||
|
||||
@qrr_h . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=1
|
||||
@qrr_e . q:1 ...... esz:2 ...... ...... rn:5 rd:5 &qrr_e
|
||||
|
||||
@qrrr_b . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=0
|
||||
|
@ -1166,3 +1168,15 @@ SMAXV 0.00 1110 .. 11000 01010 10 ..... ..... @qrr_e
|
|||
UMAXV 0.10 1110 .. 11000 01010 10 ..... ..... @qrr_e
|
||||
SMINV 0.00 1110 .. 11000 11010 10 ..... ..... @qrr_e
|
||||
UMINV 0.10 1110 .. 11000 11010 10 ..... ..... @qrr_e
|
||||
|
||||
FMAXNMV_h 0.00 1110 00 11000 01100 10 ..... ..... @qrr_h
|
||||
FMAXNMV_s 0110 1110 00 11000 01100 10 ..... ..... @rr_q1e2
|
||||
|
||||
FMINNMV_h 0.00 1110 10 11000 01100 10 ..... ..... @qrr_h
|
||||
FMINNMV_s 0110 1110 10 11000 01100 10 ..... ..... @rr_q1e2
|
||||
|
||||
FMAXV_h 0.00 1110 00 11000 01111 10 ..... ..... @qrr_h
|
||||
FMAXV_s 0110 1110 00 11000 01111 10 ..... ..... @rr_q1e2
|
||||
|
||||
FMINV_h 0.00 1110 10 11000 01111 10 ..... ..... @qrr_h
|
||||
FMINV_s 0110 1110 10 11000 01111 10 ..... ..... @rr_q1e2
|
||||
|
|
|
@ -6835,6 +6835,59 @@ TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
|
|||
TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
|
||||
TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
|
||||
|
||||
/*
|
||||
* do_fp_reduction helper
|
||||
*
|
||||
* This mirrors the Reduce() pseudocode in the ARM ARM. It is
|
||||
* important for correct NaN propagation that we do these
|
||||
* operations in exactly the order specified by the pseudocode.
|
||||
*
|
||||
* This is a recursive function.
|
||||
*/
|
||||
static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
|
||||
int ebase, int ecount, TCGv_ptr fpst,
|
||||
NeonGenTwoSingleOpFn *fn)
|
||||
{
|
||||
if (ecount == 1) {
|
||||
TCGv_i32 tcg_elem = tcg_temp_new_i32();
|
||||
read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
|
||||
return tcg_elem;
|
||||
} else {
|
||||
int half = ecount >> 1;
|
||||
TCGv_i32 tcg_hi, tcg_lo, tcg_res;
|
||||
|
||||
tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn);
|
||||
tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn);
|
||||
tcg_res = tcg_temp_new_i32();
|
||||
|
||||
fn(tcg_res, tcg_lo, tcg_hi, fpst);
|
||||
return tcg_res;
|
||||
}
|
||||
}
|
||||
|
||||
static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
|
||||
NeonGenTwoSingleOpFn *fn)
|
||||
{
|
||||
if (fp_access_check(s)) {
|
||||
MemOp esz = a->esz;
|
||||
int elts = (a->q ? 16 : 8) >> esz;
|
||||
TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
|
||||
TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn);
|
||||
write_fp_sreg(s, a->rd, res);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxnumh)
|
||||
TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minnumh)
|
||||
TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxh)
|
||||
TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minh)
|
||||
|
||||
TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums)
|
||||
TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums)
|
||||
TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs)
|
||||
TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins)
|
||||
|
||||
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
|
||||
* Note that it is the caller's responsibility to ensure that the
|
||||
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
|
||||
|
@ -9061,128 +9114,6 @@ static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* do_reduction_op helper
|
||||
*
|
||||
* This mirrors the Reduce() pseudocode in the ARM ARM. It is
|
||||
* important for correct NaN propagation that we do these
|
||||
* operations in exactly the order specified by the pseudocode.
|
||||
*
|
||||
* This is a recursive function.
|
||||
*/
|
||||
static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
|
||||
MemOp esz, int ebase, int ecount, TCGv_ptr fpst)
|
||||
{
|
||||
if (ecount == 1) {
|
||||
TCGv_i32 tcg_elem = tcg_temp_new_i32();
|
||||
read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
|
||||
return tcg_elem;
|
||||
} else {
|
||||
int half = ecount >> 1;
|
||||
TCGv_i32 tcg_hi, tcg_lo, tcg_res;
|
||||
|
||||
tcg_hi = do_reduction_op(s, fpopcode, rn, esz,
|
||||
ebase + half, half, fpst);
|
||||
tcg_lo = do_reduction_op(s, fpopcode, rn, esz,
|
||||
ebase, half, fpst);
|
||||
tcg_res = tcg_temp_new_i32();
|
||||
|
||||
switch (fpopcode) {
|
||||
case 0x0c: /* fmaxnmv half-precision */
|
||||
gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
|
||||
break;
|
||||
case 0x0f: /* fmaxv half-precision */
|
||||
gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
|
||||
break;
|
||||
case 0x1c: /* fminnmv half-precision */
|
||||
gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
|
||||
break;
|
||||
case 0x1f: /* fminv half-precision */
|
||||
gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
|
||||
break;
|
||||
case 0x2c: /* fmaxnmv */
|
||||
gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
|
||||
break;
|
||||
case 0x2f: /* fmaxv */
|
||||
gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
|
||||
break;
|
||||
case 0x3c: /* fminnmv */
|
||||
gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
|
||||
break;
|
||||
case 0x3f: /* fminv */
|
||||
gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
return tcg_res;
|
||||
}
|
||||
}
|
||||
|
||||
/* AdvSIMD across lanes
|
||||
* 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
|
||||
* +---+---+---+-----------+------+-----------+--------+-----+------+------+
|
||||
* | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
|
||||
* +---+---+---+-----------+------+-----------+--------+-----+------+------+
|
||||
*/
|
||||
static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
|
||||
{
|
||||
int rd = extract32(insn, 0, 5);
|
||||
int rn = extract32(insn, 5, 5);
|
||||
int size = extract32(insn, 22, 2);
|
||||
int opcode = extract32(insn, 12, 5);
|
||||
bool is_q = extract32(insn, 30, 1);
|
||||
bool is_u = extract32(insn, 29, 1);
|
||||
bool is_min = false;
|
||||
int elements;
|
||||
|
||||
switch (opcode) {
|
||||
case 0xc: /* FMAXNMV, FMINNMV */
|
||||
case 0xf: /* FMAXV, FMINV */
|
||||
/* Bit 1 of size field encodes min vs max and the actual size
|
||||
* depends on the encoding of the U bit. If not set (and FP16
|
||||
* enabled) then we do half-precision float instead of single
|
||||
* precision.
|
||||
*/
|
||||
is_min = extract32(size, 1, 1);
|
||||
if (!is_u && dc_isar_feature(aa64_fp16, s)) {
|
||||
size = 1;
|
||||
} else if (!is_u || !is_q || extract32(size, 0, 1)) {
|
||||
unallocated_encoding(s);
|
||||
return;
|
||||
} else {
|
||||
size = 2;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
case 0x3: /* SADDLV, UADDLV */
|
||||
case 0xa: /* SMAXV, UMAXV */
|
||||
case 0x1a: /* SMINV, UMINV */
|
||||
case 0x1b: /* ADDV */
|
||||
unallocated_encoding(s);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!fp_access_check(s)) {
|
||||
return;
|
||||
}
|
||||
|
||||
elements = (is_q ? 16 : 8) >> size;
|
||||
|
||||
{
|
||||
/* Floating point vector reduction ops which work across 32
|
||||
* bit (single) or 16 bit (half-precision) intermediates.
|
||||
* Note that correct NaN propagation requires that we do these
|
||||
* operations in exactly the order specified by the pseudocode.
|
||||
*/
|
||||
TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
|
||||
int fpopcode = opcode | is_min << 4 | is_u << 5;
|
||||
TCGv_i32 tcg_res = do_reduction_op(s, fpopcode, rn, size,
|
||||
0, elements, fpst);
|
||||
write_fp_sreg(s, rd, tcg_res);
|
||||
}
|
||||
}
|
||||
|
||||
/* AdvSIMD modified immediate
|
||||
* 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0
|
||||
* +---+---+----+---------------------+-----+-------+----+---+-------+------+
|
||||
|
@ -11735,7 +11666,6 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
|
|||
static const AArch64DecodeTable data_proc_simd[] = {
|
||||
/* pattern , mask , fn */
|
||||
{ 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
|
||||
{ 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
|
||||
/* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
|
||||
{ 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
|
||||
{ 0x0f000400, 0x9f800400, disas_simd_shift_imm },
|
||||
|
|
Loading…
Reference in New Issue