target/arm: Convert ADDV, *ADDLV, *MAXV, *MINV to decodetree

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20240912024114.1097832-11-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2024-09-11 19:40:55 -07:00 committed by Peter Maydell
parent d944e04961
commit cc7ece7216
2 changed files with 61 additions and 91 deletions

View File

@ -59,6 +59,8 @@
@rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3
@rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3
@qrr_e . q:1 ...... esz:2 ...... ...... rn:5 rd:5 &qrr_e
@qrrr_b . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=0
@qrrr_h . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=1
@qrrr_s . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=2
@ -1154,3 +1156,13 @@ TRN1 0.00 1110 .. 0 ..... 0 010 10 ..... ..... @qrrr_e
TRN2 0.00 1110 .. 0 ..... 0 110 10 ..... ..... @qrrr_e
ZIP1 0.00 1110 .. 0 ..... 0 011 10 ..... ..... @qrrr_e
ZIP2 0.00 1110 .. 0 ..... 0 111 10 ..... ..... @qrrr_e
# Advanced SIMD Across Lanes
ADDV 0.00 1110 .. 11000 11011 10 ..... ..... @qrr_e
SADDLV 0.00 1110 .. 11000 00011 10 ..... ..... @qrr_e
UADDLV 0.10 1110 .. 11000 00011 10 ..... ..... @qrr_e
SMAXV 0.00 1110 .. 11000 01010 10 ..... ..... @qrr_e
UMAXV 0.10 1110 .. 11000 01010 10 ..... ..... @qrr_e
SMINV 0.00 1110 .. 11000 11010 10 ..... ..... @qrr_e
UMINV 0.10 1110 .. 11000 11010 10 ..... ..... @qrr_e

View File

@ -6794,6 +6794,47 @@ TRANS(FNMADD, do_fmadd, a, true, true)
TRANS(FMSUB, do_fmadd, a, false, true)
TRANS(FNMSUB, do_fmadd, a, true, false)
/*
* Advanced SIMD Across Lanes
*/
static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen,
MemOp src_sign, NeonGenTwo64OpFn *fn)
{
TCGv_i64 tcg_res, tcg_elt;
MemOp src_mop = a->esz | src_sign;
int elements = (a->q ? 16 : 8) >> a->esz;
/* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */
if (elements < 4) {
return false;
}
if (!fp_access_check(s)) {
return true;
}
tcg_res = tcg_temp_new_i64();
tcg_elt = tcg_temp_new_i64();
read_vec_element(s, tcg_res, a->rn, 0, src_mop);
for (int i = 1; i < elements; i++) {
read_vec_element(s, tcg_elt, a->rn, i, src_mop);
fn(tcg_res, tcg_res, tcg_elt);
}
tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen);
write_fp_dreg(s, a->rd, tcg_res);
return true;
}
TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64)
TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64)
TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64)
TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64)
TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
* Note that it is the caller's responsibility to ensure that the
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
@ -9092,27 +9133,10 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
int opcode = extract32(insn, 12, 5);
bool is_q = extract32(insn, 30, 1);
bool is_u = extract32(insn, 29, 1);
bool is_fp = false;
bool is_min = false;
int elements;
int i;
TCGv_i64 tcg_res, tcg_elt;
switch (opcode) {
case 0x1b: /* ADDV */
if (is_u) {
unallocated_encoding(s);
return;
}
/* fall through */
case 0x3: /* SADDLV, UADDLV */
case 0xa: /* SMAXV, UMAXV */
case 0x1a: /* SMINV, UMINV */
if (size == 3 || (size == 2 && !is_q)) {
unallocated_encoding(s);
return;
}
break;
case 0xc: /* FMAXNMV, FMINNMV */
case 0xf: /* FMAXV, FMINV */
/* Bit 1 of size field encodes min vs max and the actual size
@ -9121,7 +9145,6 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
* precision.
*/
is_min = extract32(size, 1, 1);
is_fp = true;
if (!is_u && dc_isar_feature(aa64_fp16, s)) {
size = 1;
} else if (!is_u || !is_q || extract32(size, 0, 1)) {
@ -9132,6 +9155,10 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
}
break;
default:
case 0x3: /* SADDLV, UADDLV */
case 0xa: /* SMAXV, UMAXV */
case 0x1a: /* SMINV, UMINV */
case 0x1b: /* ADDV */
unallocated_encoding(s);
return;
}
@ -9142,52 +9169,7 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
elements = (is_q ? 16 : 8) >> size;
tcg_res = tcg_temp_new_i64();
tcg_elt = tcg_temp_new_i64();
/* These instructions operate across all lanes of a vector
* to produce a single result. We can guarantee that a 64
* bit intermediate is sufficient:
* + for [US]ADDLV the maximum element size is 32 bits, and
* the result type is 64 bits
* + for FMAX*V, FMIN*V, ADDV the intermediate type is the
* same as the element size, which is 32 bits at most
* For the integer operations we can choose to work at 64
* or 32 bits and truncate at the end; for simplicity
* we use 64 bits always. The floating point
* ops do require 32 bit intermediates, though.
*/
if (!is_fp) {
read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
for (i = 1; i < elements; i++) {
read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
switch (opcode) {
case 0x03: /* SADDLV / UADDLV */
case 0x1b: /* ADDV */
tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
break;
case 0x0a: /* SMAXV / UMAXV */
if (is_u) {
tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
} else {
tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
}
break;
case 0x1a: /* SMINV / UMINV */
if (is_u) {
tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
} else {
tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
}
break;
default:
g_assert_not_reached();
}
}
} else {
{
/* Floating point vector reduction ops which work across 32
* bit (single) or 16 bit (half-precision) intermediates.
* Note that correct NaN propagation requires that we do these
@ -9195,34 +9177,10 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
*/
TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
int fpopcode = opcode | is_min << 4 | is_u << 5;
TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, size,
0, elements, fpst);
tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
TCGv_i32 tcg_res = do_reduction_op(s, fpopcode, rn, size,
0, elements, fpst);
write_fp_sreg(s, rd, tcg_res);
}
/* Now truncate the result to the width required for the final output */
if (opcode == 0x03) {
/* SADDLV, UADDLV: result is 2*esize */
size++;
}
switch (size) {
case 0:
tcg_gen_ext8u_i64(tcg_res, tcg_res);
break;
case 1:
tcg_gen_ext16u_i64(tcg_res, tcg_res);
break;
case 2:
tcg_gen_ext32u_i64(tcg_res, tcg_res);
break;
case 3:
break;
default:
g_assert_not_reached();
}
write_fp_dreg(s, rd, tcg_res);
}
/* AdvSIMD modified immediate