target/arm: Convert Neon load/store multiple structures to decodetree

Convert the Neon "load/store multiple structures" insns to decodetree. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200430181003.21682-12-peter.maydell@linaro.org
2020-04-30 19:09:38 +01:00 · 2020-04-30 19:09:38 +01:00 · a27b463043
parent d27e82f7d0
commit a27b463043
3 changed files with 133 additions and 89 deletions
--- a/target/arm/neon-ls.decode
+++ b/target/arm/neon-ls.decode
@ -27,3 +27,10 @@
 #   0b1111_1001_xxx0_xxxx_xxxx_xxxx_xxxx_xxxx
 # This file works on the A32 encoding only; calling code for T32 has to
 # transform the insn into the A32 version first.
 %vd_dp  22:1 12:4
 # Neon load/store multiple structures
 VLDST_multiple 1111 0100 0 . l:1 0 rn:4 .... itype:4 size:2 align:2 rm:4 \
               vd=%vd_dp
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@ -274,3 +274,127 @@ static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
                       gen_helper_gvec_fmlal_idx_a32);
    return true;
 }
 static struct {
    int nregs;
    int interleave;
    int spacing;
 } const neon_ls_element_type[11] = {
    {1, 4, 1},
    {1, 4, 2},
    {4, 1, 1},
    {2, 2, 2},
    {1, 3, 1},
    {1, 3, 2},
    {3, 1, 1},
    {1, 1, 1},
    {1, 2, 1},
    {1, 2, 2},
    {2, 1, 1}
 };
 static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
                                      int stride)
 {
    if (rm != 15) {
        TCGv_i32 base;
        base = load_reg(s, rn);
        if (rm == 13) {
            tcg_gen_addi_i32(base, base, stride);
        } else {
            TCGv_i32 index;
            index = load_reg(s, rm);
            tcg_gen_add_i32(base, base, index);
            tcg_temp_free_i32(index);
        }
        store_reg(s, rn, base);
    }
 }
 static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
 {
    /* Neon load/store multiple structures */
    int nregs, interleave, spacing, reg, n;
    MemOp endian = s->be_data;
    int mmu_idx = get_mem_index(s);
    int size = a->size;
    TCGv_i64 tmp64;
    TCGv_i32 addr, tmp;
    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
        return false;
    }
    /* UNDEF accesses to D16-D31 if they don't exist */
    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
        return false;
    }
    if (a->itype > 10) {
        return false;
    }
    /* Catch UNDEF cases for bad values of align field */
    switch (a->itype & 0xc) {
    case 4:
        if (a->align >= 2) {
            return false;
        }
        break;
    case 8:
        if (a->align == 3) {
            return false;
        }
        break;
    default:
        break;
    }
    nregs = neon_ls_element_type[a->itype].nregs;
    interleave = neon_ls_element_type[a->itype].interleave;
    spacing = neon_ls_element_type[a->itype].spacing;
    if (size == 3 && (interleave | spacing) != 1) {
        return false;
    }
    if (!vfp_access_check(s)) {
        return true;
    }
    /* For our purposes, bytes are always little-endian.  */
    if (size == 0) {
        endian = MO_LE;
    }
    /*
     * Consecutive little-endian elements from a single register
     * can be promoted to a larger little-endian operation.
     */
    if (interleave == 1 && endian == MO_LE) {
        size = 3;
    }
    tmp64 = tcg_temp_new_i64();
    addr = tcg_temp_new_i32();
    tmp = tcg_const_i32(1 << size);
    load_reg_var(s, addr, a->rn);
    for (reg = 0; reg < nregs; reg++) {
        for (n = 0; n < 8 >> size; n++) {
            int xs;
            for (xs = 0; xs < interleave; xs++) {
                int tt = a->vd + reg + spacing * xs;
                if (a->l) {
                    gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
                    neon_store_element64(tt, n, size, tmp64);
                } else {
                    neon_load_element64(tmp64, tt, n, size);
                    gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
                }
                tcg_gen_add_i32(addr, addr, tmp);
            }
        }
    }
    tcg_temp_free_i32(addr);
    tcg_temp_free_i32(tmp);
    tcg_temp_free_i64(tmp64);
    gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
    return true;
 }
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@ -3214,45 +3214,19 @@ static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
 }
 static struct {
    int nregs;
    int interleave;
    int spacing;
 } const neon_ls_element_type[11] = {
    {1, 4, 1},
    {1, 4, 2},
    {4, 1, 1},
    {2, 2, 2},
    {1, 3, 1},
    {1, 3, 2},
    {3, 1, 1},
    {1, 1, 1},
    {1, 2, 1},
    {1, 2, 2},
    {2, 1, 1}
 };
 /* Translate a NEON load/store element instruction.  Return nonzero if the
   instruction is invalid.  */
 static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
 {
    int rd, rn, rm;
    int op;
    int nregs;
    int interleave;
    int spacing;
    int stride;
    int size;
    int reg;
    int load;
    int n;
    int vec_size;
    int mmu_idx;
    MemOp endian;
    TCGv_i32 addr;
    TCGv_i32 tmp;
    TCGv_i32 tmp2;
    TCGv_i64 tmp64;
    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
        return 1;
@ -3274,70 +3248,9 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
    rn = (insn >> 16) & 0xf;
    rm = insn & 0xf;
    load = (insn & (1 << 21)) != 0;
    endian = s->be_data;
    mmu_idx = get_mem_index(s);
    if ((insn & (1 << 23)) == 0) {
-        /* Load store all elements.  */
+        /* Load store all elements -- handled already by decodetree */
-        op = (insn >> 8) & 0xf;
+        return 1;
        size = (insn >> 6) & 3;
        if (op > 10)
            return 1;
        /* Catch UNDEF cases for bad values of align field */
        switch (op & 0xc) {
        case 4:
            if (((insn >> 5) & 1) == 1) {
                return 1;
            }
            break;
        case 8:
            if (((insn >> 4) & 3) == 3) {
                return 1;
            }
            break;
        default:
            break;
        }
        nregs = neon_ls_element_type[op].nregs;
        interleave = neon_ls_element_type[op].interleave;
        spacing = neon_ls_element_type[op].spacing;
        if (size == 3 && (interleave | spacing) != 1) {
            return 1;
        }
        /* For our purposes, bytes are always little-endian.  */
        if (size == 0) {
            endian = MO_LE;
        }
        /* Consecutive little-endian elements from a single register
         * can be promoted to a larger little-endian operation.
         */
        if (interleave == 1 && endian == MO_LE) {
            size = 3;
        }
        tmp64 = tcg_temp_new_i64();
        addr = tcg_temp_new_i32();
        tmp2 = tcg_const_i32(1 << size);
        load_reg_var(s, addr, rn);
        for (reg = 0; reg < nregs; reg++) {
            for (n = 0; n < 8 >> size; n++) {
                int xs;
                for (xs = 0; xs < interleave; xs++) {
                    int tt = rd + reg + spacing * xs;
                    if (load) {
                        gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
                        neon_store_element64(tt, n, size, tmp64);
                    } else {
                        neon_load_element64(tmp64, tt, n, size);
                        gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
                    }
                    tcg_gen_add_i32(addr, addr, tmp2);
                }
            }
        }
        tcg_temp_free_i32(addr);
        tcg_temp_free_i32(tmp2);
        tcg_temp_free_i64(tmp64);
        stride = nregs * interleave * 8;
    } else {
        size = (insn >> 10) & 3;
        if (size == 3) {