target/arm: Convert atomic memory ops to decodetree

Convert the insns in the atomic memory operations group to decodetree. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20230602155223.2040685-16-peter.maydell@linaro.org
2023-06-19 11:20:23 +01:00 · 2023-06-19 11:20:23 +01:00 · 54a9ab74ed
parent f36bf0c14a
commit 54a9ab74ed
2 changed files with 70 additions and 98 deletions
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@ -442,3 +442,18 @@ STR_v           sz:2 111 1 00 00 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0
 STR_v           00 111 1 00 10 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 sz=4
 LDR_v           sz:2 111 1 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0
 LDR_v           00 111 1 00 11 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 sz=4
 # Atomic memory operations
 &atomic         rs rn rt a r sz
@atomic         sz:2 ... . .. a:1 r:1 . rs:5 . ... .. rn:5 rt:5 &atomic
 LDADD           .. 111 0 00 . . 1 ..... 0000 00 ..... ..... @atomic
 LDCLR           .. 111 0 00 . . 1 ..... 0001 00 ..... ..... @atomic
 LDEOR           .. 111 0 00 . . 1 ..... 0010 00 ..... ..... @atomic
 LDSET           .. 111 0 00 . . 1 ..... 0011 00 ..... ..... @atomic
 LDSMAX          .. 111 0 00 . . 1 ..... 0100 00 ..... ..... @atomic
 LDSMIN          .. 111 0 00 . . 1 ..... 0101 00 ..... ..... @atomic
 LDUMAX          .. 111 0 00 . . 1 ..... 0110 00 ..... ..... @atomic
 LDUMIN          .. 111 0 00 . . 1 ..... 0111 00 ..... ..... @atomic
 SWP             .. 111 0 00 . . 1 ..... 1000 00 ..... ..... @atomic
 LDAPR           sz:2 111 0 00 1 0 1 11111 1100 00 rn:5 rt:5
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@ -3241,113 +3241,32 @@ static bool trans_STR_v(DisasContext *s, arg_ldst *a)
    return true;
 }
-/* Atomic memory operations
+
- *
+static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
- *  31  30      27  26    24    22  21   16   15    12    10    5     0
+                         int sign, bool invert)
 * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
 * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn |  Rt |
 * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
 *
 * Rt: the result register
 * Rn: base address or SP
 * Rs: the source register for the operation
 * V: vector flag (always 0 as of v8.3)
 * A: acquire flag
 * R: release flag
 */
 static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
                              int size, int rt, bool is_vector)
 {
-    int rs = extract32(insn, 16, 5);
+    MemOp mop = a->sz | sign;
-    int rn = extract32(insn, 5, 5);
+    TCGv_i64 clean_addr, tcg_rs, tcg_rt;
    int o3_opc = extract32(insn, 12, 4);
    bool r = extract32(insn, 22, 1);
    bool a = extract32(insn, 23, 1);
    TCGv_i64 tcg_rs, tcg_rt, clean_addr;
    AtomicThreeOpFn *fn = NULL;
    MemOp mop = size;
-    if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
+    if (a->rn == 31) {
        unallocated_encoding(s);
        return;
    }
    switch (o3_opc) {
    case 000: /* LDADD */
        fn = tcg_gen_atomic_fetch_add_i64;
        break;
    case 001: /* LDCLR */
        fn = tcg_gen_atomic_fetch_and_i64;
        break;
    case 002: /* LDEOR */
        fn = tcg_gen_atomic_fetch_xor_i64;
        break;
    case 003: /* LDSET */
        fn = tcg_gen_atomic_fetch_or_i64;
        break;
    case 004: /* LDSMAX */
        fn = tcg_gen_atomic_fetch_smax_i64;
        mop |= MO_SIGN;
        break;
    case 005: /* LDSMIN */
        fn = tcg_gen_atomic_fetch_smin_i64;
        mop |= MO_SIGN;
        break;
    case 006: /* LDUMAX */
        fn = tcg_gen_atomic_fetch_umax_i64;
        break;
    case 007: /* LDUMIN */
        fn = tcg_gen_atomic_fetch_umin_i64;
        break;
    case 010: /* SWP */
        fn = tcg_gen_atomic_xchg_i64;
        break;
    case 014: /* LDAPR, LDAPRH, LDAPRB */
        if (!dc_isar_feature(aa64_rcpc_8_3, s) ||
            rs != 31 || a != 1 || r != 0) {
            unallocated_encoding(s);
            return;
        }
        break;
    default:
        unallocated_encoding(s);
        return;
    }
    if (rn == 31) {
        gen_check_sp_alignment(s);
    }
-
+    mop = check_atomic_align(s, a->rn, mop);
-    mop = check_atomic_align(s, rn, mop);
+    clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
-    clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, mop);
+                                a->rn != 31, mop);
-
+    tcg_rs = read_cpu_reg(s, a->rs, true);
-    if (o3_opc == 014) {
+    tcg_rt = cpu_reg(s, a->rt);
-        /*
+    if (invert) {
         * LDAPR* are a special case because they are a simple load, not a
         * fetch-and-do-something op.
         * The architectural consistency requirements here are weaker than
         * full load-acquire (we only need "load-acquire processor consistent"),
         * but we choose to implement them as full LDAQ.
         */
        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop, false,
                  true, rt, disas_ldst_compute_iss_sf(size, false, 0), true);
        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
        return;
    }
    tcg_rs = read_cpu_reg(s, rs, true);
    tcg_rt = cpu_reg(s, rt);
    if (o3_opc == 1) { /* LDCLR */
        tcg_gen_not_i64(tcg_rs, tcg_rs);
    }
-
+    /*
-    /* The tcg atomic primitives are all full barriers.  Therefore we
+     * The tcg atomic primitives are all full barriers.  Therefore we
     * can ignore the Acquire and Release bits of this instruction.
     */
    fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
    if (mop & MO_SIGN) {
-        switch (size) {
+        switch (a->sz) {
        case MO_8:
            tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
            break;
@ -3363,6 +3282,46 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
            g_assert_not_reached();
        }
    }
    return true;
 }
 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
 {
    bool iss_sf = ldst_iss_sf(a->sz, false, false);
    TCGv_i64 clean_addr;
    MemOp mop;
    if (!dc_isar_feature(aa64_atomics, s) ||
        !dc_isar_feature(aa64_rcpc_8_3, s)) {
        return false;
    }
    if (a->rn == 31) {
        gen_check_sp_alignment(s);
    }
    mop = check_atomic_align(s, a->rn, a->sz);
    clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
                                a->rn != 31, mop);
    /*
     * LDAPR* are a special case because they are a simple load, not a
     * fetch-and-do-something op.
     * The architectural consistency requirements here are weaker than
     * full load-acquire (we only need "load-acquire processor consistent"),
     * but we choose to implement them as full LDAQ.
     */
    do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
              true, a->rt, iss_sf, true);
    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
    return true;
 }
 /*
@ -3529,8 +3488,6 @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn)
        }
        switch (extract32(insn, 10, 2)) {
        case 0:
            disas_ldst_atomic(s, insn, size, rt, is_vector);
            return;
        case 2:
            break;
        default: