target/arm: Convert SMULL, UMULL, SMLAL, UMLAL, SMLSL, UMLSL to decodetree

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 20240709000610.382391-2-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2024-07-08 17:06:05 -07:00 committed by Peter Maydell
parent 29f0bef71a
commit 97b06ab705
2 changed files with 156 additions and 50 deletions

View File

@ -962,6 +962,13 @@ FCADD_270 0.10 1110 ..0 ..... 11110 1 ..... ..... @qrrr_e
FCMLA_v 0 q:1 10 1110 esz:2 0 rm:5 110 rot:2 1 rn:5 rd:5
SMULL_v 0.00 1110 ..1 ..... 11000 0 ..... ..... @qrrr_e
UMULL_v 0.10 1110 ..1 ..... 11000 0 ..... ..... @qrrr_e
SMLAL_v 0.00 1110 ..1 ..... 10000 0 ..... ..... @qrrr_e
UMLAL_v 0.10 1110 ..1 ..... 10000 0 ..... ..... @qrrr_e
SMLSL_v 0.00 1110 ..1 ..... 10100 0 ..... ..... @qrrr_e
UMLSL_v 0.10 1110 ..1 ..... 10100 0 ..... ..... @qrrr_e
### Advanced SIMD scalar x indexed element
FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
@ -1047,6 +1054,21 @@ FCMLA_vi 0 0 10 1111 01 idx:1 rm:5 0 rot:2 1 0 0 rn:5 rd:5 esz=1 q=0
FCMLA_vi 0 1 10 1111 01 . rm:5 0 rot:2 1 . 0 rn:5 rd:5 esz=1 idx=%hl q=1
FCMLA_vi 0 1 10 1111 10 0 rm:5 0 rot:2 1 idx:1 0 rn:5 rd:5 esz=2 q=1
SMULL_vi 0.00 1111 01 .. .... 1010 . 0 ..... ..... @qrrx_h
SMULL_vi 0.00 1111 10 . ..... 1010 . 0 ..... ..... @qrrx_s
UMULL_vi 0.10 1111 01 .. .... 1010 . 0 ..... ..... @qrrx_h
UMULL_vi 0.10 1111 10 . ..... 1010 . 0 ..... ..... @qrrx_s
SMLAL_vi 0.00 1111 01 .. .... 0010 . 0 ..... ..... @qrrx_h
SMLAL_vi 0.00 1111 10 . ..... 0010 . 0 ..... ..... @qrrx_s
UMLAL_vi 0.10 1111 01 .. .... 0010 . 0 ..... ..... @qrrx_h
UMLAL_vi 0.10 1111 10 . ..... 0010 . 0 ..... ..... @qrrx_s
SMLSL_vi 0.00 1111 01 .. .... 0110 . 0 ..... ..... @qrrx_h
SMLSL_vi 0.00 1111 10 . ..... 0110 . 0 ..... ..... @qrrx_s
UMLSL_vi 0.10 1111 01 .. .... 0110 . 0 ..... ..... @qrrx_h
UMLSL_vi 0.10 1111 10 . ..... 0110 . 0 ..... ..... @qrrx_s
# Floating-point conditional select
FCSEL 0001 1110 .. 1 rm:5 cond:4 11 rn:5 rd:5 esz=%esz_hsd

View File

@ -5664,6 +5664,121 @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
return true;
}
/*
* Widening vector x vector/indexed.
*
* These read from the top or bottom half of a 128-bit vector.
* After widening, optionally accumulate with a 128-bit vector.
* Implement these inline, as the number of elements are limited
* and the related SVE and SME operations on larger vectors use
* even/odd elements instead of top/bottom half.
*
* If idx >= 0, operand 2 is indexed, otherwise vector.
* If acc, operand 0 is loaded with rd.
*/
/* For low half, iterating up. */
static bool do_3op_widening(DisasContext *s, MemOp memop, int top,
int rd, int rn, int rm, int idx,
NeonGenTwo64OpFn *fn, bool acc)
{
TCGv_i64 tcg_op0 = tcg_temp_new_i64();
TCGv_i64 tcg_op1 = tcg_temp_new_i64();
TCGv_i64 tcg_op2 = tcg_temp_new_i64();
MemOp esz = memop & MO_SIZE;
int half = 8 >> esz;
int top_swap, top_half;
/* There are no 64x64->128 bit operations. */
if (esz >= MO_64) {
return false;
}
if (!fp_access_check(s)) {
return true;
}
if (idx >= 0) {
read_vec_element(s, tcg_op2, rm, idx, memop);
}
/*
* For top half inputs, iterate forward; backward for bottom half.
* This means the store to the destination will not occur until
* overlapping input inputs are consumed.
* Use top_swap to conditionally invert the forward iteration index.
*/
top_swap = top ? 0 : half - 1;
top_half = top ? half : 0;
for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
int elt = elt_fwd ^ top_swap;
read_vec_element(s, tcg_op1, rn, elt + top_half, memop);
if (idx < 0) {
read_vec_element(s, tcg_op2, rm, elt + top_half, memop);
}
if (acc) {
read_vec_element(s, tcg_op0, rd, elt, memop + 1);
}
fn(tcg_op0, tcg_op1, tcg_op2);
write_vec_element(s, tcg_op0, rd, elt, esz + 1);
}
clear_vec_high(s, 1, rd);
return true;
}
static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
{
TCGv_i64 t = tcg_temp_new_i64();
tcg_gen_mul_i64(t, n, m);
tcg_gen_add_i64(d, d, t);
}
static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
{
TCGv_i64 t = tcg_temp_new_i64();
tcg_gen_mul_i64(t, n, m);
tcg_gen_sub_i64(d, d, t);
}
TRANS(SMULL_v, do_3op_widening,
a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
tcg_gen_mul_i64, false)
TRANS(UMULL_v, do_3op_widening,
a->esz, a->q, a->rd, a->rn, a->rm, -1,
tcg_gen_mul_i64, false)
TRANS(SMLAL_v, do_3op_widening,
a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
gen_muladd_i64, true)
TRANS(UMLAL_v, do_3op_widening,
a->esz, a->q, a->rd, a->rn, a->rm, -1,
gen_muladd_i64, true)
TRANS(SMLSL_v, do_3op_widening,
a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
gen_mulsub_i64, true)
TRANS(UMLSL_v, do_3op_widening,
a->esz, a->q, a->rd, a->rn, a->rm, -1,
gen_mulsub_i64, true)
TRANS(SMULL_vi, do_3op_widening,
a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
tcg_gen_mul_i64, false)
TRANS(UMULL_vi, do_3op_widening,
a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
tcg_gen_mul_i64, false)
TRANS(SMLAL_vi, do_3op_widening,
a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
gen_muladd_i64, true)
TRANS(UMLAL_vi, do_3op_widening,
a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
gen_muladd_i64, true)
TRANS(SMLSL_vi, do_3op_widening,
a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
gen_mulsub_i64, true)
TRANS(UMLSL_vi, do_3op_widening,
a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
gen_mulsub_i64, true)
/*
* Advanced SIMD scalar/vector x indexed element
*/
@ -10684,11 +10799,6 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
break;
}
case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
break;
case 9: /* SQDMLAL, SQDMLAL2 */
case 11: /* SQDMLSL, SQDMLSL2 */
case 13: /* SQDMULL, SQDMULL2 */
@ -10697,6 +10807,9 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
tcg_passres, tcg_passres);
break;
default:
case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
g_assert_not_reached();
}
@ -10763,23 +10876,6 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
}
}
break;
case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
if (size == 0) {
if (is_u) {
gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
} else {
gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
}
} else {
if (is_u) {
gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
} else {
gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
}
}
break;
case 9: /* SQDMLAL, SQDMLAL2 */
case 11: /* SQDMLSL, SQDMLSL2 */
case 13: /* SQDMULL, SQDMULL2 */
@ -10789,6 +10885,9 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
tcg_passres, tcg_passres);
break;
default:
case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
g_assert_not_reached();
}
@ -10981,9 +11080,6 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
/* 64 x 64 -> 128 */
if (size == 3) {
unallocated_encoding(s);
@ -10996,6 +11092,9 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
break;
default:
case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
/* opcode 15 not allocated */
unallocated_encoding(s);
break;
@ -11979,17 +12078,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
int index;
switch (16 * u + opcode) {
case 0x02: /* SMLAL, SMLAL2 */
case 0x12: /* UMLAL, UMLAL2 */
case 0x06: /* SMLSL, SMLSL2 */
case 0x16: /* UMLSL, UMLSL2 */
case 0x0a: /* SMULL, SMULL2 */
case 0x1a: /* UMULL, UMULL2 */
if (is_scalar) {
unallocated_encoding(s);
return;
}
break;
case 0x03: /* SQDMLAL, SQDMLAL2 */
case 0x07: /* SQDMLSL, SQDMLSL2 */
case 0x0b: /* SQDMULL, SQDMULL2 */
@ -11997,22 +12085,28 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
default:
case 0x00: /* FMLAL */
case 0x01: /* FMLA */
case 0x02: /* SMLAL, SMLAL2 */
case 0x04: /* FMLSL */
case 0x05: /* FMLS */
case 0x06: /* SMLSL, SMLSL2 */
case 0x08: /* MUL */
case 0x09: /* FMUL */
case 0x0a: /* SMULL, SMULL2 */
case 0x0c: /* SQDMULH */
case 0x0d: /* SQRDMULH */
case 0x0e: /* SDOT */
case 0x0f: /* SUDOT / BFDOT / USDOT / BFMLAL */
case 0x10: /* MLA */
case 0x11: /* FCMLA #0 */
case 0x12: /* UMLAL, UMLAL2 */
case 0x13: /* FCMLA #90 */
case 0x14: /* MLS */
case 0x15: /* FCMLA #180 */
case 0x16: /* UMLSL, UMLSL2 */
case 0x17: /* FCMLA #270 */
case 0x18: /* FMLAL2 */
case 0x19: /* FMULX */
case 0x1a: /* UMULL, UMULL2 */
case 0x1c: /* FMLSL2 */
case 0x1d: /* SQRDMLAH */
case 0x1e: /* UDOT */
@ -12098,12 +12192,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
switch (opcode) {
case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
break;
case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
break;
case 0x7: /* SQDMLSL, SQDMLSL2 */
tcg_gen_neg_i64(tcg_passres, tcg_passres);
/* fall through */
@ -12113,6 +12201,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
tcg_passres);
break;
default:
case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
g_assert_not_reached();
}
}
@ -12170,14 +12260,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
switch (opcode) {
case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
tcg_passres);
break;
case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
tcg_passres);
break;
case 0x7: /* SQDMLSL, SQDMLSL2 */
gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
/* fall through */
@ -12187,6 +12269,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
tcg_passres);
break;
default:
case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
g_assert_not_reached();
}
}