mirror of https://github.com/xemu-project/xemu.git
target/arm: Convert FRECPS, FRSQRTS to decodetree
These are the last instructions within handle_3same_float and disas_simd_scalar_three_reg_same_fp16 so remove them. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20240524232121.284515-28-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
43454734c4
commit
13db93bce5
|
@ -731,6 +731,12 @@ FACGT_s 0111 1110 1.1 ..... 11101 1 ..... ..... @rrr_sd
|
|||
FABD_s 0111 1110 110 ..... 00010 1 ..... ..... @rrr_h
|
||||
FABD_s 0111 1110 1.1 ..... 11010 1 ..... ..... @rrr_sd
|
||||
|
||||
FRECPS_s 0101 1110 010 ..... 00111 1 ..... ..... @rrr_h
|
||||
FRECPS_s 0101 1110 0.1 ..... 11111 1 ..... ..... @rrr_sd
|
||||
|
||||
FRSQRTS_s 0101 1110 110 ..... 00111 1 ..... ..... @rrr_h
|
||||
FRSQRTS_s 0101 1110 1.1 ..... 11111 1 ..... ..... @rrr_sd
|
||||
|
||||
### Advanced SIMD three same
|
||||
|
||||
FADD_v 0.00 1110 010 ..... 00010 1 ..... ..... @qrrr_h
|
||||
|
@ -784,6 +790,12 @@ FACGT_v 0.10 1110 1.1 ..... 11101 1 ..... ..... @qrrr_sd
|
|||
FABD_v 0.10 1110 110 ..... 00010 1 ..... ..... @qrrr_h
|
||||
FABD_v 0.10 1110 1.1 ..... 11010 1 ..... ..... @qrrr_sd
|
||||
|
||||
FRECPS_v 0.00 1110 010 ..... 00111 1 ..... ..... @qrrr_h
|
||||
FRECPS_v 0.00 1110 0.1 ..... 11111 1 ..... ..... @qrrr_sd
|
||||
|
||||
FRSQRTS_v 0.00 1110 110 ..... 00111 1 ..... ..... @qrrr_h
|
||||
FRSQRTS_v 0.00 1110 1.1 ..... 11111 1 ..... ..... @qrrr_sd
|
||||
|
||||
### Advanced SIMD scalar x indexed element
|
||||
|
||||
FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
|
||||
|
|
|
@ -5035,6 +5035,20 @@ static const FPScalar f_scalar_fabd = {
|
|||
};
|
||||
TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd)
|
||||
|
||||
static const FPScalar f_scalar_frecps = {
|
||||
gen_helper_recpsf_f16,
|
||||
gen_helper_recpsf_f32,
|
||||
gen_helper_recpsf_f64,
|
||||
};
|
||||
TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps)
|
||||
|
||||
static const FPScalar f_scalar_frsqrts = {
|
||||
gen_helper_rsqrtsf_f16,
|
||||
gen_helper_rsqrtsf_f32,
|
||||
gen_helper_rsqrtsf_f64,
|
||||
};
|
||||
TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts)
|
||||
|
||||
static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a,
|
||||
gen_helper_gvec_3_ptr * const fns[3])
|
||||
{
|
||||
|
@ -5182,6 +5196,20 @@ static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
|
|||
};
|
||||
TRANS(FABD_v, do_fp3_vector, a, f_vector_fabd)
|
||||
|
||||
static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
|
||||
gen_helper_gvec_recps_h,
|
||||
gen_helper_gvec_recps_s,
|
||||
gen_helper_gvec_recps_d,
|
||||
};
|
||||
TRANS(FRECPS_v, do_fp3_vector, a, f_vector_frecps)
|
||||
|
||||
static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
|
||||
gen_helper_gvec_rsqrts_h,
|
||||
gen_helper_gvec_rsqrts_s,
|
||||
gen_helper_gvec_rsqrts_d,
|
||||
};
|
||||
TRANS(FRSQRTS_v, do_fp3_vector, a, f_vector_frsqrts)
|
||||
|
||||
/*
|
||||
* Advanced SIMD scalar/vector x indexed element
|
||||
*/
|
||||
|
@ -9308,107 +9336,6 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u,
|
|||
}
|
||||
}
|
||||
|
||||
/* Handle the 3-same-operands float operations; shared by the scalar
|
||||
* and vector encodings. The caller must filter out any encodings
|
||||
* not allocated for the encoding it is dealing with.
|
||||
*/
|
||||
static void handle_3same_float(DisasContext *s, int size, int elements,
|
||||
int fpopcode, int rd, int rn, int rm)
|
||||
{
|
||||
int pass;
|
||||
TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
|
||||
|
||||
for (pass = 0; pass < elements; pass++) {
|
||||
if (size) {
|
||||
/* Double */
|
||||
TCGv_i64 tcg_op1 = tcg_temp_new_i64();
|
||||
TCGv_i64 tcg_op2 = tcg_temp_new_i64();
|
||||
TCGv_i64 tcg_res = tcg_temp_new_i64();
|
||||
|
||||
read_vec_element(s, tcg_op1, rn, pass, MO_64);
|
||||
read_vec_element(s, tcg_op2, rm, pass, MO_64);
|
||||
|
||||
switch (fpopcode) {
|
||||
case 0x1f: /* FRECPS */
|
||||
gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x3f: /* FRSQRTS */
|
||||
gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
default:
|
||||
case 0x18: /* FMAXNM */
|
||||
case 0x19: /* FMLA */
|
||||
case 0x1a: /* FADD */
|
||||
case 0x1b: /* FMULX */
|
||||
case 0x1c: /* FCMEQ */
|
||||
case 0x1e: /* FMAX */
|
||||
case 0x38: /* FMINNM */
|
||||
case 0x39: /* FMLS */
|
||||
case 0x3a: /* FSUB */
|
||||
case 0x3e: /* FMIN */
|
||||
case 0x5b: /* FMUL */
|
||||
case 0x5c: /* FCMGE */
|
||||
case 0x5d: /* FACGE */
|
||||
case 0x5f: /* FDIV */
|
||||
case 0x7a: /* FABD */
|
||||
case 0x7c: /* FCMGT */
|
||||
case 0x7d: /* FACGT */
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
write_vec_element(s, tcg_res, rd, pass, MO_64);
|
||||
} else {
|
||||
/* Single */
|
||||
TCGv_i32 tcg_op1 = tcg_temp_new_i32();
|
||||
TCGv_i32 tcg_op2 = tcg_temp_new_i32();
|
||||
TCGv_i32 tcg_res = tcg_temp_new_i32();
|
||||
|
||||
read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
|
||||
read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
|
||||
|
||||
switch (fpopcode) {
|
||||
case 0x1f: /* FRECPS */
|
||||
gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x3f: /* FRSQRTS */
|
||||
gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
default:
|
||||
case 0x18: /* FMAXNM */
|
||||
case 0x19: /* FMLA */
|
||||
case 0x1a: /* FADD */
|
||||
case 0x1b: /* FMULX */
|
||||
case 0x1c: /* FCMEQ */
|
||||
case 0x1e: /* FMAX */
|
||||
case 0x38: /* FMINNM */
|
||||
case 0x39: /* FMLS */
|
||||
case 0x3a: /* FSUB */
|
||||
case 0x3e: /* FMIN */
|
||||
case 0x5b: /* FMUL */
|
||||
case 0x5c: /* FCMGE */
|
||||
case 0x5d: /* FACGE */
|
||||
case 0x5f: /* FDIV */
|
||||
case 0x7a: /* FABD */
|
||||
case 0x7c: /* FCMGT */
|
||||
case 0x7d: /* FACGT */
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
if (elements == 1) {
|
||||
/* scalar single so clear high part */
|
||||
TCGv_i64 tcg_tmp = tcg_temp_new_i64();
|
||||
|
||||
tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
|
||||
write_vec_element(s, tcg_tmp, rd, pass, MO_64);
|
||||
} else {
|
||||
write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
|
||||
}
|
||||
|
||||
/* AdvSIMD scalar three same
|
||||
* 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
|
||||
* +-----+---+-----------+------+---+------+--------+---+------+------+
|
||||
|
@ -9425,33 +9352,6 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
|
|||
bool u = extract32(insn, 29, 1);
|
||||
TCGv_i64 tcg_rd;
|
||||
|
||||
if (opcode >= 0x18) {
|
||||
/* Floating point: U, size[1] and opcode indicate operation */
|
||||
int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
|
||||
switch (fpopcode) {
|
||||
case 0x1f: /* FRECPS */
|
||||
case 0x3f: /* FRSQRTS */
|
||||
break;
|
||||
default:
|
||||
case 0x1b: /* FMULX */
|
||||
case 0x5d: /* FACGE */
|
||||
case 0x7d: /* FACGT */
|
||||
case 0x1c: /* FCMEQ */
|
||||
case 0x5c: /* FCMGE */
|
||||
case 0x7a: /* FABD */
|
||||
case 0x7c: /* FCMGT */
|
||||
unallocated_encoding(s);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!fp_access_check(s)) {
|
||||
return;
|
||||
}
|
||||
|
||||
handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (opcode) {
|
||||
case 0x1: /* SQADD, UQADD */
|
||||
case 0x5: /* SQSUB, UQSUB */
|
||||
|
@ -9568,80 +9468,6 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
|
|||
write_fp_dreg(s, rd, tcg_rd);
|
||||
}
|
||||
|
||||
/* AdvSIMD scalar three same FP16
|
||||
* 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0
|
||||
* +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
|
||||
* | 0 1 | U | 1 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd |
|
||||
* +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
|
||||
* v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
|
||||
* m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
|
||||
*/
|
||||
static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
|
||||
uint32_t insn)
|
||||
{
|
||||
int rd = extract32(insn, 0, 5);
|
||||
int rn = extract32(insn, 5, 5);
|
||||
int opcode = extract32(insn, 11, 3);
|
||||
int rm = extract32(insn, 16, 5);
|
||||
bool u = extract32(insn, 29, 1);
|
||||
bool a = extract32(insn, 23, 1);
|
||||
int fpopcode = opcode | (a << 3) | (u << 4);
|
||||
TCGv_ptr fpst;
|
||||
TCGv_i32 tcg_op1;
|
||||
TCGv_i32 tcg_op2;
|
||||
TCGv_i32 tcg_res;
|
||||
|
||||
switch (fpopcode) {
|
||||
case 0x07: /* FRECPS */
|
||||
case 0x0f: /* FRSQRTS */
|
||||
break;
|
||||
default:
|
||||
case 0x03: /* FMULX */
|
||||
case 0x04: /* FCMEQ (reg) */
|
||||
case 0x14: /* FCMGE (reg) */
|
||||
case 0x15: /* FACGE */
|
||||
case 0x1a: /* FABD */
|
||||
case 0x1c: /* FCMGT (reg) */
|
||||
case 0x1d: /* FACGT */
|
||||
unallocated_encoding(s);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!dc_isar_feature(aa64_fp16, s)) {
|
||||
unallocated_encoding(s);
|
||||
}
|
||||
|
||||
if (!fp_access_check(s)) {
|
||||
return;
|
||||
}
|
||||
|
||||
fpst = fpstatus_ptr(FPST_FPCR_F16);
|
||||
|
||||
tcg_op1 = read_fp_hreg(s, rn);
|
||||
tcg_op2 = read_fp_hreg(s, rm);
|
||||
tcg_res = tcg_temp_new_i32();
|
||||
|
||||
switch (fpopcode) {
|
||||
case 0x07: /* FRECPS */
|
||||
gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x0f: /* FRSQRTS */
|
||||
gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
default:
|
||||
case 0x03: /* FMULX */
|
||||
case 0x04: /* FCMEQ (reg) */
|
||||
case 0x14: /* FCMGE (reg) */
|
||||
case 0x15: /* FACGE */
|
||||
case 0x1a: /* FABD */
|
||||
case 0x1c: /* FCMGT (reg) */
|
||||
case 0x1d: /* FACGT */
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
write_fp_sreg(s, rd, tcg_res);
|
||||
}
|
||||
|
||||
/* AdvSIMD scalar three same extra
|
||||
* 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0
|
||||
* +-----+---+-----------+------+---+------+---+--------+---+----+----+
|
||||
|
@ -11114,7 +10940,7 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
|
|||
|
||||
/* Pairwise op subgroup of C3.6.16.
|
||||
*
|
||||
* This is called directly or via the handle_3same_float for float pairwise
|
||||
* This is called directly for float pairwise
|
||||
* operations where the opcode and size are calculated differently.
|
||||
*/
|
||||
static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
|
||||
|
@ -11271,10 +11097,6 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
|
|||
int rn = extract32(insn, 5, 5);
|
||||
int rd = extract32(insn, 0, 5);
|
||||
|
||||
int datasize = is_q ? 128 : 64;
|
||||
int esize = 32 << size;
|
||||
int elements = datasize / esize;
|
||||
|
||||
if (size == 1 && !is_q) {
|
||||
unallocated_encoding(s);
|
||||
return;
|
||||
|
@ -11293,13 +11115,6 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
|
|||
handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
|
||||
rn, rm, rd);
|
||||
return;
|
||||
case 0x1f: /* FRECPS */
|
||||
case 0x3f: /* FRSQRTS */
|
||||
if (!fp_access_check(s)) {
|
||||
return;
|
||||
}
|
||||
handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
|
||||
return;
|
||||
|
||||
case 0x1d: /* FMLAL */
|
||||
case 0x3d: /* FMLSL */
|
||||
|
@ -11328,10 +11143,12 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
|
|||
case 0x1b: /* FMULX */
|
||||
case 0x1c: /* FCMEQ */
|
||||
case 0x1e: /* FMAX */
|
||||
case 0x1f: /* FRECPS */
|
||||
case 0x38: /* FMINNM */
|
||||
case 0x39: /* FMLS */
|
||||
case 0x3a: /* FSUB */
|
||||
case 0x3e: /* FMIN */
|
||||
case 0x3f: /* FRSQRTS */
|
||||
case 0x5b: /* FMUL */
|
||||
case 0x5c: /* FCMGE */
|
||||
case 0x5d: /* FACGE */
|
||||
|
@ -11673,17 +11490,11 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
|
|||
* together indicate the operation.
|
||||
*/
|
||||
int fpopcode = opcode | (a << 3) | (u << 4);
|
||||
int datasize = is_q ? 128 : 64;
|
||||
int elements = datasize / 16;
|
||||
bool pairwise;
|
||||
TCGv_ptr fpst;
|
||||
int pass;
|
||||
|
||||
switch (fpopcode) {
|
||||
case 0x7: /* FRECPS */
|
||||
case 0xf: /* FRSQRTS */
|
||||
pairwise = false;
|
||||
break;
|
||||
case 0x10: /* FMAXNMP */
|
||||
case 0x12: /* FADDP */
|
||||
case 0x16: /* FMAXP */
|
||||
|
@ -11698,10 +11509,12 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
|
|||
case 0x3: /* FMULX */
|
||||
case 0x4: /* FCMEQ */
|
||||
case 0x6: /* FMAX */
|
||||
case 0x7: /* FRECPS */
|
||||
case 0x8: /* FMINNM */
|
||||
case 0x9: /* FMLS */
|
||||
case 0xa: /* FSUB */
|
||||
case 0xe: /* FMIN */
|
||||
case 0xf: /* FRSQRTS */
|
||||
case 0x13: /* FMUL */
|
||||
case 0x14: /* FCMGE */
|
||||
case 0x15: /* FACGE */
|
||||
|
@ -11765,44 +11578,7 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
|
|||
write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
|
||||
}
|
||||
} else {
|
||||
for (pass = 0; pass < elements; pass++) {
|
||||
TCGv_i32 tcg_op1 = tcg_temp_new_i32();
|
||||
TCGv_i32 tcg_op2 = tcg_temp_new_i32();
|
||||
TCGv_i32 tcg_res = tcg_temp_new_i32();
|
||||
|
||||
read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
|
||||
read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
|
||||
|
||||
switch (fpopcode) {
|
||||
case 0x7: /* FRECPS */
|
||||
gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0xf: /* FRSQRTS */
|
||||
gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
default:
|
||||
case 0x0: /* FMAXNM */
|
||||
case 0x1: /* FMLA */
|
||||
case 0x2: /* FADD */
|
||||
case 0x3: /* FMULX */
|
||||
case 0x4: /* FCMEQ */
|
||||
case 0x6: /* FMAX */
|
||||
case 0x8: /* FMINNM */
|
||||
case 0x9: /* FMLS */
|
||||
case 0xa: /* FSUB */
|
||||
case 0xe: /* FMIN */
|
||||
case 0x13: /* FMUL */
|
||||
case 0x14: /* FCMGE */
|
||||
case 0x15: /* FACGE */
|
||||
case 0x17: /* FDIV */
|
||||
case 0x1a: /* FABD */
|
||||
case 0x1c: /* FCMGT */
|
||||
case 0x1d: /* FACGT */
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
|
||||
}
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
clear_vec_high(s, is_q, rd);
|
||||
|
@ -13572,7 +13348,6 @@ static const AArch64DecodeTable data_proc_simd[] = {
|
|||
{ 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
|
||||
{ 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
|
||||
{ 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
|
||||
{ 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
|
||||
{ 0x00000000, 0x00000000, NULL }
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue