mirror of https://github.com/xemu-project/xemu.git
target/arm: Convert SMULL, UMULL, SMLAL, UMLAL, SMLSL, UMLSL to decodetree
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Message-id: 20240709000610.382391-2-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
29f0bef71a
commit
97b06ab705
target/arm/tcg
|
@ -962,6 +962,13 @@ FCADD_270 0.10 1110 ..0 ..... 11110 1 ..... ..... @qrrr_e
|
|||
|
||||
FCMLA_v 0 q:1 10 1110 esz:2 0 rm:5 110 rot:2 1 rn:5 rd:5
|
||||
|
||||
SMULL_v 0.00 1110 ..1 ..... 11000 0 ..... ..... @qrrr_e
|
||||
UMULL_v 0.10 1110 ..1 ..... 11000 0 ..... ..... @qrrr_e
|
||||
SMLAL_v 0.00 1110 ..1 ..... 10000 0 ..... ..... @qrrr_e
|
||||
UMLAL_v 0.10 1110 ..1 ..... 10000 0 ..... ..... @qrrr_e
|
||||
SMLSL_v 0.00 1110 ..1 ..... 10100 0 ..... ..... @qrrr_e
|
||||
UMLSL_v 0.10 1110 ..1 ..... 10100 0 ..... ..... @qrrr_e
|
||||
|
||||
### Advanced SIMD scalar x indexed element
|
||||
|
||||
FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
|
||||
|
@ -1047,6 +1054,21 @@ FCMLA_vi 0 0 10 1111 01 idx:1 rm:5 0 rot:2 1 0 0 rn:5 rd:5 esz=1 q=0
|
|||
FCMLA_vi 0 1 10 1111 01 . rm:5 0 rot:2 1 . 0 rn:5 rd:5 esz=1 idx=%hl q=1
|
||||
FCMLA_vi 0 1 10 1111 10 0 rm:5 0 rot:2 1 idx:1 0 rn:5 rd:5 esz=2 q=1
|
||||
|
||||
SMULL_vi 0.00 1111 01 .. .... 1010 . 0 ..... ..... @qrrx_h
|
||||
SMULL_vi 0.00 1111 10 . ..... 1010 . 0 ..... ..... @qrrx_s
|
||||
UMULL_vi 0.10 1111 01 .. .... 1010 . 0 ..... ..... @qrrx_h
|
||||
UMULL_vi 0.10 1111 10 . ..... 1010 . 0 ..... ..... @qrrx_s
|
||||
|
||||
SMLAL_vi 0.00 1111 01 .. .... 0010 . 0 ..... ..... @qrrx_h
|
||||
SMLAL_vi 0.00 1111 10 . ..... 0010 . 0 ..... ..... @qrrx_s
|
||||
UMLAL_vi 0.10 1111 01 .. .... 0010 . 0 ..... ..... @qrrx_h
|
||||
UMLAL_vi 0.10 1111 10 . ..... 0010 . 0 ..... ..... @qrrx_s
|
||||
|
||||
SMLSL_vi 0.00 1111 01 .. .... 0110 . 0 ..... ..... @qrrx_h
|
||||
SMLSL_vi 0.00 1111 10 . ..... 0110 . 0 ..... ..... @qrrx_s
|
||||
UMLSL_vi 0.10 1111 01 .. .... 0110 . 0 ..... ..... @qrrx_h
|
||||
UMLSL_vi 0.10 1111 10 . ..... 0110 . 0 ..... ..... @qrrx_s
|
||||
|
||||
# Floating-point conditional select
|
||||
|
||||
FCSEL 0001 1110 .. 1 rm:5 cond:4 11 rn:5 rd:5 esz=%esz_hsd
|
||||
|
|
|
@ -5664,6 +5664,121 @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Widening vector x vector/indexed.
|
||||
*
|
||||
* These read from the top or bottom half of a 128-bit vector.
|
||||
* After widening, optionally accumulate with a 128-bit vector.
|
||||
* Implement these inline, as the number of elements are limited
|
||||
* and the related SVE and SME operations on larger vectors use
|
||||
* even/odd elements instead of top/bottom half.
|
||||
*
|
||||
* If idx >= 0, operand 2 is indexed, otherwise vector.
|
||||
* If acc, operand 0 is loaded with rd.
|
||||
*/
|
||||
|
||||
/* For low half, iterating up. */
|
||||
static bool do_3op_widening(DisasContext *s, MemOp memop, int top,
|
||||
int rd, int rn, int rm, int idx,
|
||||
NeonGenTwo64OpFn *fn, bool acc)
|
||||
{
|
||||
TCGv_i64 tcg_op0 = tcg_temp_new_i64();
|
||||
TCGv_i64 tcg_op1 = tcg_temp_new_i64();
|
||||
TCGv_i64 tcg_op2 = tcg_temp_new_i64();
|
||||
MemOp esz = memop & MO_SIZE;
|
||||
int half = 8 >> esz;
|
||||
int top_swap, top_half;
|
||||
|
||||
/* There are no 64x64->128 bit operations. */
|
||||
if (esz >= MO_64) {
|
||||
return false;
|
||||
}
|
||||
if (!fp_access_check(s)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (idx >= 0) {
|
||||
read_vec_element(s, tcg_op2, rm, idx, memop);
|
||||
}
|
||||
|
||||
/*
|
||||
* For top half inputs, iterate forward; backward for bottom half.
|
||||
* This means the store to the destination will not occur until
|
||||
* overlapping input inputs are consumed.
|
||||
* Use top_swap to conditionally invert the forward iteration index.
|
||||
*/
|
||||
top_swap = top ? 0 : half - 1;
|
||||
top_half = top ? half : 0;
|
||||
|
||||
for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
|
||||
int elt = elt_fwd ^ top_swap;
|
||||
|
||||
read_vec_element(s, tcg_op1, rn, elt + top_half, memop);
|
||||
if (idx < 0) {
|
||||
read_vec_element(s, tcg_op2, rm, elt + top_half, memop);
|
||||
}
|
||||
if (acc) {
|
||||
read_vec_element(s, tcg_op0, rd, elt, memop + 1);
|
||||
}
|
||||
fn(tcg_op0, tcg_op1, tcg_op2);
|
||||
write_vec_element(s, tcg_op0, rd, elt, esz + 1);
|
||||
}
|
||||
clear_vec_high(s, 1, rd);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
|
||||
{
|
||||
TCGv_i64 t = tcg_temp_new_i64();
|
||||
tcg_gen_mul_i64(t, n, m);
|
||||
tcg_gen_add_i64(d, d, t);
|
||||
}
|
||||
|
||||
static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
|
||||
{
|
||||
TCGv_i64 t = tcg_temp_new_i64();
|
||||
tcg_gen_mul_i64(t, n, m);
|
||||
tcg_gen_sub_i64(d, d, t);
|
||||
}
|
||||
|
||||
TRANS(SMULL_v, do_3op_widening,
|
||||
a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
|
||||
tcg_gen_mul_i64, false)
|
||||
TRANS(UMULL_v, do_3op_widening,
|
||||
a->esz, a->q, a->rd, a->rn, a->rm, -1,
|
||||
tcg_gen_mul_i64, false)
|
||||
TRANS(SMLAL_v, do_3op_widening,
|
||||
a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
|
||||
gen_muladd_i64, true)
|
||||
TRANS(UMLAL_v, do_3op_widening,
|
||||
a->esz, a->q, a->rd, a->rn, a->rm, -1,
|
||||
gen_muladd_i64, true)
|
||||
TRANS(SMLSL_v, do_3op_widening,
|
||||
a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
|
||||
gen_mulsub_i64, true)
|
||||
TRANS(UMLSL_v, do_3op_widening,
|
||||
a->esz, a->q, a->rd, a->rn, a->rm, -1,
|
||||
gen_mulsub_i64, true)
|
||||
|
||||
TRANS(SMULL_vi, do_3op_widening,
|
||||
a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
|
||||
tcg_gen_mul_i64, false)
|
||||
TRANS(UMULL_vi, do_3op_widening,
|
||||
a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
|
||||
tcg_gen_mul_i64, false)
|
||||
TRANS(SMLAL_vi, do_3op_widening,
|
||||
a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
|
||||
gen_muladd_i64, true)
|
||||
TRANS(UMLAL_vi, do_3op_widening,
|
||||
a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
|
||||
gen_muladd_i64, true)
|
||||
TRANS(SMLSL_vi, do_3op_widening,
|
||||
a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
|
||||
gen_mulsub_i64, true)
|
||||
TRANS(UMLSL_vi, do_3op_widening,
|
||||
a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
|
||||
gen_mulsub_i64, true)
|
||||
|
||||
/*
|
||||
* Advanced SIMD scalar/vector x indexed element
|
||||
*/
|
||||
|
@ -10684,11 +10799,6 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
|
|||
tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
|
||||
break;
|
||||
}
|
||||
case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
|
||||
case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
|
||||
case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
|
||||
tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
|
||||
break;
|
||||
case 9: /* SQDMLAL, SQDMLAL2 */
|
||||
case 11: /* SQDMLSL, SQDMLSL2 */
|
||||
case 13: /* SQDMULL, SQDMULL2 */
|
||||
|
@ -10697,6 +10807,9 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
|
|||
tcg_passres, tcg_passres);
|
||||
break;
|
||||
default:
|
||||
case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
|
||||
case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
|
||||
case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
|
@ -10763,23 +10876,6 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
|
|||
}
|
||||
}
|
||||
break;
|
||||
case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
|
||||
case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
|
||||
case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
|
||||
if (size == 0) {
|
||||
if (is_u) {
|
||||
gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
|
||||
} else {
|
||||
gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
|
||||
}
|
||||
} else {
|
||||
if (is_u) {
|
||||
gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
|
||||
} else {
|
||||
gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 9: /* SQDMLAL, SQDMLAL2 */
|
||||
case 11: /* SQDMLSL, SQDMLSL2 */
|
||||
case 13: /* SQDMULL, SQDMULL2 */
|
||||
|
@ -10789,6 +10885,9 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
|
|||
tcg_passres, tcg_passres);
|
||||
break;
|
||||
default:
|
||||
case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
|
||||
case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
|
||||
case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
|
@ -10981,9 +11080,6 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
|
|||
case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
|
||||
case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
|
||||
case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
|
||||
case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
|
||||
case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
|
||||
case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
|
||||
/* 64 x 64 -> 128 */
|
||||
if (size == 3) {
|
||||
unallocated_encoding(s);
|
||||
|
@ -10996,6 +11092,9 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
|
|||
handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
|
||||
break;
|
||||
default:
|
||||
case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
|
||||
case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
|
||||
case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
|
||||
/* opcode 15 not allocated */
|
||||
unallocated_encoding(s);
|
||||
break;
|
||||
|
@ -11979,17 +12078,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
|
|||
int index;
|
||||
|
||||
switch (16 * u + opcode) {
|
||||
case 0x02: /* SMLAL, SMLAL2 */
|
||||
case 0x12: /* UMLAL, UMLAL2 */
|
||||
case 0x06: /* SMLSL, SMLSL2 */
|
||||
case 0x16: /* UMLSL, UMLSL2 */
|
||||
case 0x0a: /* SMULL, SMULL2 */
|
||||
case 0x1a: /* UMULL, UMULL2 */
|
||||
if (is_scalar) {
|
||||
unallocated_encoding(s);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case 0x03: /* SQDMLAL, SQDMLAL2 */
|
||||
case 0x07: /* SQDMLSL, SQDMLSL2 */
|
||||
case 0x0b: /* SQDMULL, SQDMULL2 */
|
||||
|
@ -11997,22 +12085,28 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
|
|||
default:
|
||||
case 0x00: /* FMLAL */
|
||||
case 0x01: /* FMLA */
|
||||
case 0x02: /* SMLAL, SMLAL2 */
|
||||
case 0x04: /* FMLSL */
|
||||
case 0x05: /* FMLS */
|
||||
case 0x06: /* SMLSL, SMLSL2 */
|
||||
case 0x08: /* MUL */
|
||||
case 0x09: /* FMUL */
|
||||
case 0x0a: /* SMULL, SMULL2 */
|
||||
case 0x0c: /* SQDMULH */
|
||||
case 0x0d: /* SQRDMULH */
|
||||
case 0x0e: /* SDOT */
|
||||
case 0x0f: /* SUDOT / BFDOT / USDOT / BFMLAL */
|
||||
case 0x10: /* MLA */
|
||||
case 0x11: /* FCMLA #0 */
|
||||
case 0x12: /* UMLAL, UMLAL2 */
|
||||
case 0x13: /* FCMLA #90 */
|
||||
case 0x14: /* MLS */
|
||||
case 0x15: /* FCMLA #180 */
|
||||
case 0x16: /* UMLSL, UMLSL2 */
|
||||
case 0x17: /* FCMLA #270 */
|
||||
case 0x18: /* FMLAL2 */
|
||||
case 0x19: /* FMULX */
|
||||
case 0x1a: /* UMULL, UMULL2 */
|
||||
case 0x1c: /* FMLSL2 */
|
||||
case 0x1d: /* SQRDMLAH */
|
||||
case 0x1e: /* UDOT */
|
||||
|
@ -12098,12 +12192,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
|
|||
read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
|
||||
|
||||
switch (opcode) {
|
||||
case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
|
||||
tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
|
||||
break;
|
||||
case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
|
||||
tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
|
||||
break;
|
||||
case 0x7: /* SQDMLSL, SQDMLSL2 */
|
||||
tcg_gen_neg_i64(tcg_passres, tcg_passres);
|
||||
/* fall through */
|
||||
|
@ -12113,6 +12201,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
|
|||
tcg_passres);
|
||||
break;
|
||||
default:
|
||||
case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
|
||||
case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
|
||||
g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
|
@ -12170,14 +12260,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
|
|||
read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
|
||||
|
||||
switch (opcode) {
|
||||
case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
|
||||
gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
|
||||
tcg_passres);
|
||||
break;
|
||||
case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
|
||||
gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
|
||||
tcg_passres);
|
||||
break;
|
||||
case 0x7: /* SQDMLSL, SQDMLSL2 */
|
||||
gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
|
||||
/* fall through */
|
||||
|
@ -12187,6 +12269,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
|
|||
tcg_passres);
|
||||
break;
|
||||
default:
|
||||
case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
|
||||
case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
|
||||
g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue