target/arm: Convert FRECPS, FRSQRTS to decodetree

These are the last instructions within handle_3same_float
and disas_simd_scalar_three_reg_same_fp16 so remove them.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20240524232121.284515-28-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2024-05-24 16:20:41 -07:00 committed by Peter Maydell
parent 43454734c4
commit 13db93bce5
2 changed files with 46 additions and 259 deletions

View File

@ -731,6 +731,12 @@ FACGT_s 0111 1110 1.1 ..... 11101 1 ..... ..... @rrr_sd
FABD_s 0111 1110 110 ..... 00010 1 ..... ..... @rrr_h
FABD_s 0111 1110 1.1 ..... 11010 1 ..... ..... @rrr_sd
FRECPS_s 0101 1110 010 ..... 00111 1 ..... ..... @rrr_h
FRECPS_s 0101 1110 0.1 ..... 11111 1 ..... ..... @rrr_sd
FRSQRTS_s 0101 1110 110 ..... 00111 1 ..... ..... @rrr_h
FRSQRTS_s 0101 1110 1.1 ..... 11111 1 ..... ..... @rrr_sd
### Advanced SIMD three same
FADD_v 0.00 1110 010 ..... 00010 1 ..... ..... @qrrr_h
@ -784,6 +790,12 @@ FACGT_v 0.10 1110 1.1 ..... 11101 1 ..... ..... @qrrr_sd
FABD_v 0.10 1110 110 ..... 00010 1 ..... ..... @qrrr_h
FABD_v 0.10 1110 1.1 ..... 11010 1 ..... ..... @qrrr_sd
FRECPS_v 0.00 1110 010 ..... 00111 1 ..... ..... @qrrr_h
FRECPS_v 0.00 1110 0.1 ..... 11111 1 ..... ..... @qrrr_sd
FRSQRTS_v 0.00 1110 110 ..... 00111 1 ..... ..... @qrrr_h
FRSQRTS_v 0.00 1110 1.1 ..... 11111 1 ..... ..... @qrrr_sd
### Advanced SIMD scalar x indexed element
FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h

View File

@ -5035,6 +5035,20 @@ static const FPScalar f_scalar_fabd = {
};
TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd)
static const FPScalar f_scalar_frecps = {
gen_helper_recpsf_f16,
gen_helper_recpsf_f32,
gen_helper_recpsf_f64,
};
TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps)
static const FPScalar f_scalar_frsqrts = {
gen_helper_rsqrtsf_f16,
gen_helper_rsqrtsf_f32,
gen_helper_rsqrtsf_f64,
};
TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts)
static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a,
gen_helper_gvec_3_ptr * const fns[3])
{
@ -5182,6 +5196,20 @@ static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
};
TRANS(FABD_v, do_fp3_vector, a, f_vector_fabd)
static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
gen_helper_gvec_recps_h,
gen_helper_gvec_recps_s,
gen_helper_gvec_recps_d,
};
TRANS(FRECPS_v, do_fp3_vector, a, f_vector_frecps)
static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
gen_helper_gvec_rsqrts_h,
gen_helper_gvec_rsqrts_s,
gen_helper_gvec_rsqrts_d,
};
TRANS(FRSQRTS_v, do_fp3_vector, a, f_vector_frsqrts)
/*
* Advanced SIMD scalar/vector x indexed element
*/
@ -9308,107 +9336,6 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u,
}
}
/* Handle the 3-same-operands float operations; shared by the scalar
* and vector encodings. The caller must filter out any encodings
* not allocated for the encoding it is dealing with.
*/
static void handle_3same_float(DisasContext *s, int size, int elements,
int fpopcode, int rd, int rn, int rm)
{
int pass;
TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
for (pass = 0; pass < elements; pass++) {
if (size) {
/* Double */
TCGv_i64 tcg_op1 = tcg_temp_new_i64();
TCGv_i64 tcg_op2 = tcg_temp_new_i64();
TCGv_i64 tcg_res = tcg_temp_new_i64();
read_vec_element(s, tcg_op1, rn, pass, MO_64);
read_vec_element(s, tcg_op2, rm, pass, MO_64);
switch (fpopcode) {
case 0x1f: /* FRECPS */
gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
break;
case 0x3f: /* FRSQRTS */
gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
break;
default:
case 0x18: /* FMAXNM */
case 0x19: /* FMLA */
case 0x1a: /* FADD */
case 0x1b: /* FMULX */
case 0x1c: /* FCMEQ */
case 0x1e: /* FMAX */
case 0x38: /* FMINNM */
case 0x39: /* FMLS */
case 0x3a: /* FSUB */
case 0x3e: /* FMIN */
case 0x5b: /* FMUL */
case 0x5c: /* FCMGE */
case 0x5d: /* FACGE */
case 0x5f: /* FDIV */
case 0x7a: /* FABD */
case 0x7c: /* FCMGT */
case 0x7d: /* FACGT */
g_assert_not_reached();
}
write_vec_element(s, tcg_res, rd, pass, MO_64);
} else {
/* Single */
TCGv_i32 tcg_op1 = tcg_temp_new_i32();
TCGv_i32 tcg_op2 = tcg_temp_new_i32();
TCGv_i32 tcg_res = tcg_temp_new_i32();
read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
switch (fpopcode) {
case 0x1f: /* FRECPS */
gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
break;
case 0x3f: /* FRSQRTS */
gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
break;
default:
case 0x18: /* FMAXNM */
case 0x19: /* FMLA */
case 0x1a: /* FADD */
case 0x1b: /* FMULX */
case 0x1c: /* FCMEQ */
case 0x1e: /* FMAX */
case 0x38: /* FMINNM */
case 0x39: /* FMLS */
case 0x3a: /* FSUB */
case 0x3e: /* FMIN */
case 0x5b: /* FMUL */
case 0x5c: /* FCMGE */
case 0x5d: /* FACGE */
case 0x5f: /* FDIV */
case 0x7a: /* FABD */
case 0x7c: /* FCMGT */
case 0x7d: /* FACGT */
g_assert_not_reached();
}
if (elements == 1) {
/* scalar single so clear high part */
TCGv_i64 tcg_tmp = tcg_temp_new_i64();
tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
write_vec_element(s, tcg_tmp, rd, pass, MO_64);
} else {
write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
}
}
}
clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
}
/* AdvSIMD scalar three same
* 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
* +-----+---+-----------+------+---+------+--------+---+------+------+
@ -9425,33 +9352,6 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
bool u = extract32(insn, 29, 1);
TCGv_i64 tcg_rd;
if (opcode >= 0x18) {
/* Floating point: U, size[1] and opcode indicate operation */
int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
switch (fpopcode) {
case 0x1f: /* FRECPS */
case 0x3f: /* FRSQRTS */
break;
default:
case 0x1b: /* FMULX */
case 0x5d: /* FACGE */
case 0x7d: /* FACGT */
case 0x1c: /* FCMEQ */
case 0x5c: /* FCMGE */
case 0x7a: /* FABD */
case 0x7c: /* FCMGT */
unallocated_encoding(s);
return;
}
if (!fp_access_check(s)) {
return;
}
handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
return;
}
switch (opcode) {
case 0x1: /* SQADD, UQADD */
case 0x5: /* SQSUB, UQSUB */
@ -9568,80 +9468,6 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
write_fp_dreg(s, rd, tcg_rd);
}
/* AdvSIMD scalar three same FP16
* 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0
* +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
* | 0 1 | U | 1 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd |
* +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
* v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
* m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
*/
static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
uint32_t insn)
{
int rd = extract32(insn, 0, 5);
int rn = extract32(insn, 5, 5);
int opcode = extract32(insn, 11, 3);
int rm = extract32(insn, 16, 5);
bool u = extract32(insn, 29, 1);
bool a = extract32(insn, 23, 1);
int fpopcode = opcode | (a << 3) | (u << 4);
TCGv_ptr fpst;
TCGv_i32 tcg_op1;
TCGv_i32 tcg_op2;
TCGv_i32 tcg_res;
switch (fpopcode) {
case 0x07: /* FRECPS */
case 0x0f: /* FRSQRTS */
break;
default:
case 0x03: /* FMULX */
case 0x04: /* FCMEQ (reg) */
case 0x14: /* FCMGE (reg) */
case 0x15: /* FACGE */
case 0x1a: /* FABD */
case 0x1c: /* FCMGT (reg) */
case 0x1d: /* FACGT */
unallocated_encoding(s);
return;
}
if (!dc_isar_feature(aa64_fp16, s)) {
unallocated_encoding(s);
}
if (!fp_access_check(s)) {
return;
}
fpst = fpstatus_ptr(FPST_FPCR_F16);
tcg_op1 = read_fp_hreg(s, rn);
tcg_op2 = read_fp_hreg(s, rm);
tcg_res = tcg_temp_new_i32();
switch (fpopcode) {
case 0x07: /* FRECPS */
gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
break;
case 0x0f: /* FRSQRTS */
gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
break;
default:
case 0x03: /* FMULX */
case 0x04: /* FCMEQ (reg) */
case 0x14: /* FCMGE (reg) */
case 0x15: /* FACGE */
case 0x1a: /* FABD */
case 0x1c: /* FCMGT (reg) */
case 0x1d: /* FACGT */
g_assert_not_reached();
}
write_fp_sreg(s, rd, tcg_res);
}
/* AdvSIMD scalar three same extra
* 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0
* +-----+---+-----------+------+---+------+---+--------+---+----+----+
@ -11114,7 +10940,7 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
/* Pairwise op subgroup of C3.6.16.
*
* This is called directly or via the handle_3same_float for float pairwise
* This is called directly for float pairwise
* operations where the opcode and size are calculated differently.
*/
static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
@ -11271,10 +11097,6 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
int datasize = is_q ? 128 : 64;
int esize = 32 << size;
int elements = datasize / esize;
if (size == 1 && !is_q) {
unallocated_encoding(s);
return;
@ -11293,13 +11115,6 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
rn, rm, rd);
return;
case 0x1f: /* FRECPS */
case 0x3f: /* FRSQRTS */
if (!fp_access_check(s)) {
return;
}
handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
return;
case 0x1d: /* FMLAL */
case 0x3d: /* FMLSL */
@ -11328,10 +11143,12 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
case 0x1b: /* FMULX */
case 0x1c: /* FCMEQ */
case 0x1e: /* FMAX */
case 0x1f: /* FRECPS */
case 0x38: /* FMINNM */
case 0x39: /* FMLS */
case 0x3a: /* FSUB */
case 0x3e: /* FMIN */
case 0x3f: /* FRSQRTS */
case 0x5b: /* FMUL */
case 0x5c: /* FCMGE */
case 0x5d: /* FACGE */
@ -11673,17 +11490,11 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
* together indicate the operation.
*/
int fpopcode = opcode | (a << 3) | (u << 4);
int datasize = is_q ? 128 : 64;
int elements = datasize / 16;
bool pairwise;
TCGv_ptr fpst;
int pass;
switch (fpopcode) {
case 0x7: /* FRECPS */
case 0xf: /* FRSQRTS */
pairwise = false;
break;
case 0x10: /* FMAXNMP */
case 0x12: /* FADDP */
case 0x16: /* FMAXP */
@ -11698,10 +11509,12 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
case 0x3: /* FMULX */
case 0x4: /* FCMEQ */
case 0x6: /* FMAX */
case 0x7: /* FRECPS */
case 0x8: /* FMINNM */
case 0x9: /* FMLS */
case 0xa: /* FSUB */
case 0xe: /* FMIN */
case 0xf: /* FRSQRTS */
case 0x13: /* FMUL */
case 0x14: /* FCMGE */
case 0x15: /* FACGE */
@ -11765,44 +11578,7 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
}
} else {
for (pass = 0; pass < elements; pass++) {
TCGv_i32 tcg_op1 = tcg_temp_new_i32();
TCGv_i32 tcg_op2 = tcg_temp_new_i32();
TCGv_i32 tcg_res = tcg_temp_new_i32();
read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
switch (fpopcode) {
case 0x7: /* FRECPS */
gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
break;
case 0xf: /* FRSQRTS */
gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
break;
default:
case 0x0: /* FMAXNM */
case 0x1: /* FMLA */
case 0x2: /* FADD */
case 0x3: /* FMULX */
case 0x4: /* FCMEQ */
case 0x6: /* FMAX */
case 0x8: /* FMINNM */
case 0x9: /* FMLS */
case 0xa: /* FSUB */
case 0xe: /* FMIN */
case 0x13: /* FMUL */
case 0x14: /* FCMGE */
case 0x15: /* FACGE */
case 0x17: /* FDIV */
case 0x1a: /* FABD */
case 0x1c: /* FCMGT */
case 0x1d: /* FACGT */
g_assert_not_reached();
}
write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
}
g_assert_not_reached();
}
clear_vec_high(s, is_q, rd);
@ -13572,7 +13348,6 @@ static const AArch64DecodeTable data_proc_simd[] = {
{ 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
{ 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
{ 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
{ 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
{ 0x00000000, 0x00000000, NULL }
};