mirror of https://github.com/xemu-project/xemu.git
target/arm: Convert load/store (multiple structures) to decodetree
Convert the instructions in the ASIMD load/store multiple structures instruction classes to decodetree. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20230602155223.2040685-19-peter.maydell@linaro.org
This commit is contained in:
parent
2521b6073b
commit
e25ba1fa0b
|
@ -474,3 +474,23 @@ LDAPR_i 01 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext
|
||||||
LDAPR_i 10 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=2
|
LDAPR_i 10 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=2
|
||||||
LDAPR_i 00 011001 11 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=1 sz=0
|
LDAPR_i 00 011001 11 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=1 sz=0
|
||||||
LDAPR_i 01 011001 11 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=1 sz=1
|
LDAPR_i 01 011001 11 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=1 sz=1
|
||||||
|
|
||||||
|
# Load/store multiple structures
|
||||||
|
# The 4-bit opcode in [15:12] encodes repeat count and structure elements
|
||||||
|
&ldst_mult rm rn rt sz q p rpt selem
|
||||||
|
@ldst_mult . q:1 ...... p:1 . . rm:5 .... sz:2 rn:5 rt:5 &ldst_mult
|
||||||
|
ST_mult 0 . 001100 . 0 0 ..... 0000 .. ..... ..... @ldst_mult rpt=1 selem=4
|
||||||
|
ST_mult 0 . 001100 . 0 0 ..... 0010 .. ..... ..... @ldst_mult rpt=4 selem=1
|
||||||
|
ST_mult 0 . 001100 . 0 0 ..... 0100 .. ..... ..... @ldst_mult rpt=1 selem=3
|
||||||
|
ST_mult 0 . 001100 . 0 0 ..... 0110 .. ..... ..... @ldst_mult rpt=3 selem=1
|
||||||
|
ST_mult 0 . 001100 . 0 0 ..... 0111 .. ..... ..... @ldst_mult rpt=1 selem=1
|
||||||
|
ST_mult 0 . 001100 . 0 0 ..... 1000 .. ..... ..... @ldst_mult rpt=1 selem=2
|
||||||
|
ST_mult 0 . 001100 . 0 0 ..... 1010 .. ..... ..... @ldst_mult rpt=2 selem=1
|
||||||
|
|
||||||
|
LD_mult 0 . 001100 . 1 0 ..... 0000 .. ..... ..... @ldst_mult rpt=1 selem=4
|
||||||
|
LD_mult 0 . 001100 . 1 0 ..... 0010 .. ..... ..... @ldst_mult rpt=4 selem=1
|
||||||
|
LD_mult 0 . 001100 . 1 0 ..... 0100 .. ..... ..... @ldst_mult rpt=1 selem=3
|
||||||
|
LD_mult 0 . 001100 . 1 0 ..... 0110 .. ..... ..... @ldst_mult rpt=3 selem=1
|
||||||
|
LD_mult 0 . 001100 . 1 0 ..... 0111 .. ..... ..... @ldst_mult rpt=1 selem=1
|
||||||
|
LD_mult 0 . 001100 . 1 0 ..... 1000 .. ..... ..... @ldst_mult rpt=1 selem=2
|
||||||
|
LD_mult 0 . 001100 . 1 0 ..... 1010 .. ..... ..... @ldst_mult rpt=2 selem=1
|
||||||
|
|
|
@ -3414,99 +3414,28 @@ static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* AdvSIMD load/store multiple structures
|
static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
|
||||||
*
|
|
||||||
* 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0
|
|
||||||
* +---+---+---------------+---+-------------+--------+------+------+------+
|
|
||||||
* | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size | Rn | Rt |
|
|
||||||
* +---+---+---------------+---+-------------+--------+------+------+------+
|
|
||||||
*
|
|
||||||
* AdvSIMD load/store multiple structures (post-indexed)
|
|
||||||
*
|
|
||||||
* 31 30 29 23 22 21 20 16 15 12 11 10 9 5 4 0
|
|
||||||
* +---+---+---------------+---+---+---------+--------+------+------+------+
|
|
||||||
* | 0 | Q | 0 0 1 1 0 0 1 | L | 0 | Rm | opcode | size | Rn | Rt |
|
|
||||||
* +---+---+---------------+---+---+---------+--------+------+------+------+
|
|
||||||
*
|
|
||||||
* Rt: first (or only) SIMD&FP register to be transferred
|
|
||||||
* Rn: base address or SP
|
|
||||||
* Rm (post-index only): post-index register (when !31) or size dependent #imm
|
|
||||||
*/
|
|
||||||
static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
|
|
||||||
{
|
{
|
||||||
int rt = extract32(insn, 0, 5);
|
|
||||||
int rn = extract32(insn, 5, 5);
|
|
||||||
int rm = extract32(insn, 16, 5);
|
|
||||||
int size = extract32(insn, 10, 2);
|
|
||||||
int opcode = extract32(insn, 12, 4);
|
|
||||||
bool is_store = !extract32(insn, 22, 1);
|
|
||||||
bool is_postidx = extract32(insn, 23, 1);
|
|
||||||
bool is_q = extract32(insn, 30, 1);
|
|
||||||
TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
|
TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
|
||||||
MemOp endian, align, mop;
|
MemOp endian, align, mop;
|
||||||
|
|
||||||
int total; /* total bytes */
|
int total; /* total bytes */
|
||||||
int elements; /* elements per vector */
|
int elements; /* elements per vector */
|
||||||
int rpt; /* num iterations */
|
|
||||||
int selem; /* structure elements */
|
|
||||||
int r;
|
int r;
|
||||||
|
int size = a->sz;
|
||||||
|
|
||||||
if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
|
if (!a->p && a->rm != 0) {
|
||||||
unallocated_encoding(s);
|
/* For non-postindexed accesses the Rm field must be 0 */
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (size == 3 && !a->q && a->selem != 1) {
|
||||||
if (!is_postidx && rm != 0) {
|
return false;
|
||||||
unallocated_encoding(s);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* From the shared decode logic */
|
|
||||||
switch (opcode) {
|
|
||||||
case 0x0:
|
|
||||||
rpt = 1;
|
|
||||||
selem = 4;
|
|
||||||
break;
|
|
||||||
case 0x2:
|
|
||||||
rpt = 4;
|
|
||||||
selem = 1;
|
|
||||||
break;
|
|
||||||
case 0x4:
|
|
||||||
rpt = 1;
|
|
||||||
selem = 3;
|
|
||||||
break;
|
|
||||||
case 0x6:
|
|
||||||
rpt = 3;
|
|
||||||
selem = 1;
|
|
||||||
break;
|
|
||||||
case 0x7:
|
|
||||||
rpt = 1;
|
|
||||||
selem = 1;
|
|
||||||
break;
|
|
||||||
case 0x8:
|
|
||||||
rpt = 1;
|
|
||||||
selem = 2;
|
|
||||||
break;
|
|
||||||
case 0xa:
|
|
||||||
rpt = 2;
|
|
||||||
selem = 1;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unallocated_encoding(s);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (size == 3 && !is_q && selem != 1) {
|
|
||||||
/* reserved */
|
|
||||||
unallocated_encoding(s);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!fp_access_check(s)) {
|
if (!fp_access_check(s)) {
|
||||||
return;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rn == 31) {
|
if (a->rn == 31) {
|
||||||
gen_check_sp_alignment(s);
|
gen_check_sp_alignment(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3516,22 +3445,22 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
|
||||||
endian = MO_LE;
|
endian = MO_LE;
|
||||||
}
|
}
|
||||||
|
|
||||||
total = rpt * selem * (is_q ? 16 : 8);
|
total = a->rpt * a->selem * (a->q ? 16 : 8);
|
||||||
tcg_rn = cpu_reg_sp(s, rn);
|
tcg_rn = cpu_reg_sp(s, a->rn);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Issue the MTE check vs the logical repeat count, before we
|
* Issue the MTE check vs the logical repeat count, before we
|
||||||
* promote consecutive little-endian elements below.
|
* promote consecutive little-endian elements below.
|
||||||
*/
|
*/
|
||||||
clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31,
|
clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
|
||||||
total, finalize_memop_asimd(s, size));
|
finalize_memop_asimd(s, size));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Consecutive little-endian elements from a single register
|
* Consecutive little-endian elements from a single register
|
||||||
* can be promoted to a larger little-endian operation.
|
* can be promoted to a larger little-endian operation.
|
||||||
*/
|
*/
|
||||||
align = MO_ALIGN;
|
align = MO_ALIGN;
|
||||||
if (selem == 1 && endian == MO_LE) {
|
if (a->selem == 1 && endian == MO_LE) {
|
||||||
align = pow2_align(size);
|
align = pow2_align(size);
|
||||||
size = 3;
|
size = 3;
|
||||||
}
|
}
|
||||||
|
@ -3540,45 +3469,119 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
|
||||||
}
|
}
|
||||||
mop = endian | size | align;
|
mop = endian | size | align;
|
||||||
|
|
||||||
elements = (is_q ? 16 : 8) >> size;
|
elements = (a->q ? 16 : 8) >> size;
|
||||||
tcg_ebytes = tcg_constant_i64(1 << size);
|
tcg_ebytes = tcg_constant_i64(1 << size);
|
||||||
for (r = 0; r < rpt; r++) {
|
for (r = 0; r < a->rpt; r++) {
|
||||||
int e;
|
int e;
|
||||||
for (e = 0; e < elements; e++) {
|
for (e = 0; e < elements; e++) {
|
||||||
int xs;
|
int xs;
|
||||||
for (xs = 0; xs < selem; xs++) {
|
for (xs = 0; xs < a->selem; xs++) {
|
||||||
int tt = (rt + r + xs) % 32;
|
int tt = (a->rt + r + xs) % 32;
|
||||||
if (is_store) {
|
|
||||||
do_vec_st(s, tt, e, clean_addr, mop);
|
|
||||||
} else {
|
|
||||||
do_vec_ld(s, tt, e, clean_addr, mop);
|
do_vec_ld(s, tt, e, clean_addr, mop);
|
||||||
}
|
|
||||||
tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
|
tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!is_store) {
|
/*
|
||||||
/* For non-quad operations, setting a slice of the low
|
* For non-quad operations, setting a slice of the low 64 bits of
|
||||||
* 64 bits of the register clears the high 64 bits (in
|
* the register clears the high 64 bits (in the ARM ARM pseudocode
|
||||||
* the ARM ARM pseudocode this is implicit in the fact
|
* this is implicit in the fact that 'rval' is a 64 bit wide
|
||||||
* that 'rval' is a 64 bit wide variable).
|
* variable). For quad operations, we might still need to zero
|
||||||
* For quad operations, we might still need to zero the
|
* the high bits of SVE.
|
||||||
* high bits of SVE.
|
|
||||||
*/
|
*/
|
||||||
for (r = 0; r < rpt * selem; r++) {
|
for (r = 0; r < a->rpt * a->selem; r++) {
|
||||||
int tt = (rt + r) % 32;
|
int tt = (a->rt + r) % 32;
|
||||||
clear_vec_high(s, is_q, tt);
|
clear_vec_high(s, a->q, tt);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (a->p) {
|
||||||
|
if (a->rm == 31) {
|
||||||
|
tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
|
||||||
|
} else {
|
||||||
|
tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
|
||||||
|
{
|
||||||
|
TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
|
||||||
|
MemOp endian, align, mop;
|
||||||
|
|
||||||
|
int total; /* total bytes */
|
||||||
|
int elements; /* elements per vector */
|
||||||
|
int r;
|
||||||
|
int size = a->sz;
|
||||||
|
|
||||||
|
if (!a->p && a->rm != 0) {
|
||||||
|
/* For non-postindexed accesses the Rm field must be 0 */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (size == 3 && !a->q && a->selem != 1) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!fp_access_check(s)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (a->rn == 31) {
|
||||||
|
gen_check_sp_alignment(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* For our purposes, bytes are always little-endian. */
|
||||||
|
endian = s->be_data;
|
||||||
|
if (size == 0) {
|
||||||
|
endian = MO_LE;
|
||||||
|
}
|
||||||
|
|
||||||
|
total = a->rpt * a->selem * (a->q ? 16 : 8);
|
||||||
|
tcg_rn = cpu_reg_sp(s, a->rn);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Issue the MTE check vs the logical repeat count, before we
|
||||||
|
* promote consecutive little-endian elements below.
|
||||||
|
*/
|
||||||
|
clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
|
||||||
|
finalize_memop_asimd(s, size));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Consecutive little-endian elements from a single register
|
||||||
|
* can be promoted to a larger little-endian operation.
|
||||||
|
*/
|
||||||
|
align = MO_ALIGN;
|
||||||
|
if (a->selem == 1 && endian == MO_LE) {
|
||||||
|
align = pow2_align(size);
|
||||||
|
size = 3;
|
||||||
|
}
|
||||||
|
if (!s->align_mem) {
|
||||||
|
align = 0;
|
||||||
|
}
|
||||||
|
mop = endian | size | align;
|
||||||
|
|
||||||
|
elements = (a->q ? 16 : 8) >> size;
|
||||||
|
tcg_ebytes = tcg_constant_i64(1 << size);
|
||||||
|
for (r = 0; r < a->rpt; r++) {
|
||||||
|
int e;
|
||||||
|
for (e = 0; e < elements; e++) {
|
||||||
|
int xs;
|
||||||
|
for (xs = 0; xs < a->selem; xs++) {
|
||||||
|
int tt = (a->rt + r + xs) % 32;
|
||||||
|
do_vec_st(s, tt, e, clean_addr, mop);
|
||||||
|
tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_postidx) {
|
if (a->p) {
|
||||||
if (rm == 31) {
|
if (a->rm == 31) {
|
||||||
tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
|
tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
|
||||||
} else {
|
} else {
|
||||||
tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
|
tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* AdvSIMD load/store single structure
|
/* AdvSIMD load/store single structure
|
||||||
|
@ -3931,9 +3934,6 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn)
|
||||||
static void disas_ldst(DisasContext *s, uint32_t insn)
|
static void disas_ldst(DisasContext *s, uint32_t insn)
|
||||||
{
|
{
|
||||||
switch (extract32(insn, 24, 6)) {
|
switch (extract32(insn, 24, 6)) {
|
||||||
case 0x0c: /* AdvSIMD load/store multiple structures */
|
|
||||||
disas_ldst_multiple_struct(s, insn);
|
|
||||||
break;
|
|
||||||
case 0x0d: /* AdvSIMD load/store single structure */
|
case 0x0d: /* AdvSIMD load/store single structure */
|
||||||
disas_ldst_single_struct(s, insn);
|
disas_ldst_single_struct(s, insn);
|
||||||
break;
|
break;
|
||||||
|
|
Loading…
Reference in New Issue