From c1e4e87f5f2caef1a368a77e12a872ff8fb5a01f Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 24 Oct 2021 23:17:47 -0700 Subject: [PATCH] tcg: Cache invalidated TBs --- accel/tcg/cpu-exec.c | 36 +++++++++++++++++++++++++++++++----- accel/tcg/cputlb.c | 8 ++++++++ accel/tcg/tb-context.h | 1 + accel/tcg/tb-hash.h | 10 ++++++++++ accel/tcg/translate-all.c | 27 +++++++++++++++++++++++++++ accel/tcg/translator.c | 2 ++ include/exec/cpu_ldst.h | 1 + include/exec/exec-all.h | 4 ++++ 8 files changed, 84 insertions(+), 5 deletions(-) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index e5c0ccd1a2..96ffbfeec1 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -491,7 +491,7 @@ static bool tb_lookup_cmp(const void *p, const void *d) tb->cs_base == desc->cs_base && tb->flags == desc->flags && tb->trace_vcpu_dstate == desc->trace_vcpu_dstate && - tb_cflags(tb) == desc->cflags) { + (tb_cflags(tb) & ~CF_INVALID) == desc->cflags) { /* check next page if needed */ if (tb->page_addr[1] == -1) { return true; @@ -509,9 +509,19 @@ static bool tb_lookup_cmp(const void *p, const void *d) return false; } -TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, - target_ulong cs_base, uint32_t flags, - uint32_t cflags) +static bool inv_tb_lookup_cmp(const void *p, const void *d) +{ + const TranslationBlock *tb = p; + const struct tb_desc *desc = d; + + return tb_lookup_cmp(p, d) && + tb->ihash == tb_code_hash_func(desc->env, tb->pc, tb->size); +} + +static TranslationBlock * +tb_htable_lookup_common(CPUState *cpu, target_ulong pc, target_ulong cs_base, + uint32_t flags, uint32_t cflags, const struct qht *ht, + qht_lookup_func_t func) { tb_page_addr_t phys_pc; struct tb_desc desc; @@ -529,7 +539,23 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, } desc.phys_page1 = phys_pc & TARGET_PAGE_MASK; h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate); - return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp); + return qht_lookup_custom(ht, &desc, h, func); +} + +TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, + target_ulong cs_base, uint32_t flags, + uint32_t cflags) +{ + return tb_htable_lookup_common(cpu, pc, cs_base, flags, cflags, + &tb_ctx.htable, tb_lookup_cmp); +} + +TranslationBlock *inv_tb_htable_lookup(CPUState *cpu, target_ulong pc, + target_ulong cs_base, uint32_t flags, + uint32_t cflags) +{ + return tb_htable_lookup_common(cpu, pc, cs_base, flags, cflags, + &tb_ctx.inv_htable, inv_tb_lookup_cmp); } void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index d6025e6d1b..13cf307906 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -2768,6 +2768,14 @@ uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr) return full_ldub_code(env, addr, oi, 0); } +void cpu_ld_code(CPUArchState *env, abi_ptr addr, size_t len, uint8_t *out) +{ + TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true)); + for (size_t i = 0; i < len; i++) { + out[i] = full_ldub_code(env, addr, oi, 0); + } +} + static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr) { diff --git a/accel/tcg/tb-context.h b/accel/tcg/tb-context.h index cac62d9749..163ed8215b 100644 --- a/accel/tcg/tb-context.h +++ b/accel/tcg/tb-context.h @@ -31,6 +31,7 @@ typedef struct TBContext TBContext; struct TBContext { struct qht htable; + struct qht inv_htable; /* statistics */ unsigned tb_flush_count; diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h index 0a273d9605..da7a230a1b 100644 --- a/accel/tcg/tb-hash.h +++ b/accel/tcg/tb-hash.h @@ -23,6 +23,7 @@ #include "exec/cpu-defs.h" #include "exec/exec-all.h" #include "qemu/xxhash.h" +#include "qemu/fast-hash.h" #ifdef CONFIG_SOFTMMU @@ -66,4 +67,13 @@ uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags, return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate); } +static inline +uint64_t tb_code_hash_func(CPUArchState *env, target_ulong pc, size_t size) +{ + assert(size < 4096); + uint8_t code[size]; + cpu_ld_code(env, pc, size, code); /* Speed, error handling */ + return fast_hash(code, size); +} + #endif diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 92694cf9c6..ef6b3d019e 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -899,11 +899,18 @@ static bool tb_cmp(const void *ap, const void *bp) a->page_addr[1] == b->page_addr[1]; } +static bool inv_tb_cmp(const void *ap, const void *bp) +{ + const TranslationBlock *a = ap, *b = bp; + return tb_cmp(ap, bp) && a->ihash == b->ihash; +} + void tb_htable_init(void) { unsigned int mode = QHT_MODE_AUTO_RESIZE; qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode); + qht_init(&tb_ctx.inv_htable, inv_tb_cmp, CODE_GEN_HTABLE_SIZE, mode); } /* call with @p->lock held */ @@ -989,6 +996,8 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count) } qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE); + qht_reset_size(&tb_ctx.inv_htable, CODE_GEN_HTABLE_SIZE); + page_flush_tb(); tcg_region_reset_all(); @@ -1176,6 +1185,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) uint32_t h; tb_page_addr_t phys_pc; uint32_t orig_cflags = tb_cflags(tb); + void *existing = NULL; assert_memory_lock(); @@ -1192,6 +1202,9 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) return; } + qht_insert(&tb_ctx.inv_htable, tb, h, &existing); + g_assert(existing == NULL); + /* remove the TB from the page list */ if (rm_from_page_list) { p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS); @@ -1432,6 +1445,18 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS); + tb = inv_tb_htable_lookup(cpu, pc, cs_base, flags, cflags); + if (tb) { + qemu_spin_lock(&tb->jmp_lock); + qatomic_set(&tb->cflags, tb->cflags & ~CF_INVALID); + qemu_spin_unlock(&tb->jmp_lock); + uint32_t h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb_cflags(tb), + tb->trace_vcpu_dstate); + bool removed = qht_remove(&tb_ctx.inv_htable, tb, h); + g_assert(removed); + goto recycle_tb; + } + buffer_overflow: tb = tcg_tb_alloc(tcg_ctx); if (unlikely(!tb)) { @@ -1622,6 +1647,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu, /* init jump list */ qemu_spin_init(&tb->jmp_lock); + +recycle_tb: tb->jmp_list_head = (uintptr_t)NULL; tb->jmp_list_next[0] = (uintptr_t)NULL; tb->jmp_list_next[1] = (uintptr_t)NULL; diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index c53a7f8e44..7b054719eb 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -17,6 +17,7 @@ #include "exec/translator.h" #include "exec/plugin-gen.h" #include "sysemu/replay.h" +#include "accel/tcg/tb-hash.h" /* Pairs with tcg_clear_temp_count. To be called by #TranslatorOps.{translate_insn,tb_stop} if @@ -125,6 +126,7 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, /* The disas_log hook may use these values rather than recompute. */ tb->size = db->pc_next - db->pc_first; tb->icount = db->num_insns; + tb->ihash = tb_code_hash_func(cpu->env_ptr, tb->pc, tb->size); #ifdef DEBUG_DISAS if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index ce6ce82618..44a4e070ec 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -428,6 +428,7 @@ uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr); uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr); uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr); uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr); +void cpu_ld_code(CPUArchState *env, abi_ptr addr, size_t len, uint8_t *out); static inline int cpu_ldsb_code(CPUArchState *env, abi_ptr addr) { diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 5d1b6d80fb..1c2aff028a 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -516,6 +516,7 @@ struct TranslationBlock { /* size of target code for this block (1 <= size <= TARGET_PAGE_SIZE) */ uint16_t size; uint16_t icount; + uint64_t ihash; struct tb_tc tc; @@ -582,6 +583,9 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr); TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, target_ulong cs_base, uint32_t flags, uint32_t cflags); +TranslationBlock *inv_tb_htable_lookup(CPUState *cpu, target_ulong pc, + target_ulong cs_base, uint32_t flags, + uint32_t cflags); void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr); /* GETPC is the true target of the return instruction that we'll execute. */