mirror of https://github.com/xemu-project/xemu.git
target/arm: Convert ADDV, *ADDLV, *MAXV, *MINV to decodetree
Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20240912024114.1097832-11-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
d944e04961
commit
cc7ece7216
|
@ -59,6 +59,8 @@
|
|||
@rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3
|
||||
@rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3
|
||||
|
||||
@qrr_e . q:1 ...... esz:2 ...... ...... rn:5 rd:5 &qrr_e
|
||||
|
||||
@qrrr_b . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=0
|
||||
@qrrr_h . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=1
|
||||
@qrrr_s . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=2
|
||||
|
@ -1154,3 +1156,13 @@ TRN1 0.00 1110 .. 0 ..... 0 010 10 ..... ..... @qrrr_e
|
|||
TRN2 0.00 1110 .. 0 ..... 0 110 10 ..... ..... @qrrr_e
|
||||
ZIP1 0.00 1110 .. 0 ..... 0 011 10 ..... ..... @qrrr_e
|
||||
ZIP2 0.00 1110 .. 0 ..... 0 111 10 ..... ..... @qrrr_e
|
||||
|
||||
# Advanced SIMD Across Lanes
|
||||
|
||||
ADDV 0.00 1110 .. 11000 11011 10 ..... ..... @qrr_e
|
||||
SADDLV 0.00 1110 .. 11000 00011 10 ..... ..... @qrr_e
|
||||
UADDLV 0.10 1110 .. 11000 00011 10 ..... ..... @qrr_e
|
||||
SMAXV 0.00 1110 .. 11000 01010 10 ..... ..... @qrr_e
|
||||
UMAXV 0.10 1110 .. 11000 01010 10 ..... ..... @qrr_e
|
||||
SMINV 0.00 1110 .. 11000 11010 10 ..... ..... @qrr_e
|
||||
UMINV 0.10 1110 .. 11000 11010 10 ..... ..... @qrr_e
|
||||
|
|
|
@ -6794,6 +6794,47 @@ TRANS(FNMADD, do_fmadd, a, true, true)
|
|||
TRANS(FMSUB, do_fmadd, a, false, true)
|
||||
TRANS(FNMSUB, do_fmadd, a, true, false)
|
||||
|
||||
/*
|
||||
* Advanced SIMD Across Lanes
|
||||
*/
|
||||
|
||||
static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen,
|
||||
MemOp src_sign, NeonGenTwo64OpFn *fn)
|
||||
{
|
||||
TCGv_i64 tcg_res, tcg_elt;
|
||||
MemOp src_mop = a->esz | src_sign;
|
||||
int elements = (a->q ? 16 : 8) >> a->esz;
|
||||
|
||||
/* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */
|
||||
if (elements < 4) {
|
||||
return false;
|
||||
}
|
||||
if (!fp_access_check(s)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
tcg_res = tcg_temp_new_i64();
|
||||
tcg_elt = tcg_temp_new_i64();
|
||||
|
||||
read_vec_element(s, tcg_res, a->rn, 0, src_mop);
|
||||
for (int i = 1; i < elements; i++) {
|
||||
read_vec_element(s, tcg_elt, a->rn, i, src_mop);
|
||||
fn(tcg_res, tcg_res, tcg_elt);
|
||||
}
|
||||
|
||||
tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen);
|
||||
write_fp_dreg(s, a->rd, tcg_res);
|
||||
return true;
|
||||
}
|
||||
|
||||
TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64)
|
||||
TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64)
|
||||
TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64)
|
||||
TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64)
|
||||
TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
|
||||
TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
|
||||
TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
|
||||
|
||||
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
|
||||
* Note that it is the caller's responsibility to ensure that the
|
||||
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
|
||||
|
@ -9092,27 +9133,10 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
|
|||
int opcode = extract32(insn, 12, 5);
|
||||
bool is_q = extract32(insn, 30, 1);
|
||||
bool is_u = extract32(insn, 29, 1);
|
||||
bool is_fp = false;
|
||||
bool is_min = false;
|
||||
int elements;
|
||||
int i;
|
||||
TCGv_i64 tcg_res, tcg_elt;
|
||||
|
||||
switch (opcode) {
|
||||
case 0x1b: /* ADDV */
|
||||
if (is_u) {
|
||||
unallocated_encoding(s);
|
||||
return;
|
||||
}
|
||||
/* fall through */
|
||||
case 0x3: /* SADDLV, UADDLV */
|
||||
case 0xa: /* SMAXV, UMAXV */
|
||||
case 0x1a: /* SMINV, UMINV */
|
||||
if (size == 3 || (size == 2 && !is_q)) {
|
||||
unallocated_encoding(s);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case 0xc: /* FMAXNMV, FMINNMV */
|
||||
case 0xf: /* FMAXV, FMINV */
|
||||
/* Bit 1 of size field encodes min vs max and the actual size
|
||||
|
@ -9121,7 +9145,6 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
|
|||
* precision.
|
||||
*/
|
||||
is_min = extract32(size, 1, 1);
|
||||
is_fp = true;
|
||||
if (!is_u && dc_isar_feature(aa64_fp16, s)) {
|
||||
size = 1;
|
||||
} else if (!is_u || !is_q || extract32(size, 0, 1)) {
|
||||
|
@ -9132,6 +9155,10 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
|
|||
}
|
||||
break;
|
||||
default:
|
||||
case 0x3: /* SADDLV, UADDLV */
|
||||
case 0xa: /* SMAXV, UMAXV */
|
||||
case 0x1a: /* SMINV, UMINV */
|
||||
case 0x1b: /* ADDV */
|
||||
unallocated_encoding(s);
|
||||
return;
|
||||
}
|
||||
|
@ -9142,52 +9169,7 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
|
|||
|
||||
elements = (is_q ? 16 : 8) >> size;
|
||||
|
||||
tcg_res = tcg_temp_new_i64();
|
||||
tcg_elt = tcg_temp_new_i64();
|
||||
|
||||
/* These instructions operate across all lanes of a vector
|
||||
* to produce a single result. We can guarantee that a 64
|
||||
* bit intermediate is sufficient:
|
||||
* + for [US]ADDLV the maximum element size is 32 bits, and
|
||||
* the result type is 64 bits
|
||||
* + for FMAX*V, FMIN*V, ADDV the intermediate type is the
|
||||
* same as the element size, which is 32 bits at most
|
||||
* For the integer operations we can choose to work at 64
|
||||
* or 32 bits and truncate at the end; for simplicity
|
||||
* we use 64 bits always. The floating point
|
||||
* ops do require 32 bit intermediates, though.
|
||||
*/
|
||||
if (!is_fp) {
|
||||
read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
|
||||
|
||||
for (i = 1; i < elements; i++) {
|
||||
read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
|
||||
|
||||
switch (opcode) {
|
||||
case 0x03: /* SADDLV / UADDLV */
|
||||
case 0x1b: /* ADDV */
|
||||
tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
|
||||
break;
|
||||
case 0x0a: /* SMAXV / UMAXV */
|
||||
if (is_u) {
|
||||
tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
|
||||
} else {
|
||||
tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
|
||||
}
|
||||
break;
|
||||
case 0x1a: /* SMINV / UMINV */
|
||||
if (is_u) {
|
||||
tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
|
||||
} else {
|
||||
tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
}
|
||||
} else {
|
||||
{
|
||||
/* Floating point vector reduction ops which work across 32
|
||||
* bit (single) or 16 bit (half-precision) intermediates.
|
||||
* Note that correct NaN propagation requires that we do these
|
||||
|
@ -9195,34 +9177,10 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
|
|||
*/
|
||||
TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
|
||||
int fpopcode = opcode | is_min << 4 | is_u << 5;
|
||||
TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, size,
|
||||
0, elements, fpst);
|
||||
tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
|
||||
TCGv_i32 tcg_res = do_reduction_op(s, fpopcode, rn, size,
|
||||
0, elements, fpst);
|
||||
write_fp_sreg(s, rd, tcg_res);
|
||||
}
|
||||
|
||||
/* Now truncate the result to the width required for the final output */
|
||||
if (opcode == 0x03) {
|
||||
/* SADDLV, UADDLV: result is 2*esize */
|
||||
size++;
|
||||
}
|
||||
|
||||
switch (size) {
|
||||
case 0:
|
||||
tcg_gen_ext8u_i64(tcg_res, tcg_res);
|
||||
break;
|
||||
case 1:
|
||||
tcg_gen_ext16u_i64(tcg_res, tcg_res);
|
||||
break;
|
||||
case 2:
|
||||
tcg_gen_ext32u_i64(tcg_res, tcg_res);
|
||||
break;
|
||||
case 3:
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
write_fp_dreg(s, rd, tcg_res);
|
||||
}
|
||||
|
||||
/* AdvSIMD modified immediate
|
||||
|
|
Loading…
Reference in New Issue