accel/tcg: Add fast path for translator_ld*

Cache the translation from guest to host address, so we may
use direct loads when we hit on the primary translation page.

Look up the second translation page only once, during translation.
This obviates another lookup of the second page within tb_gen_code
after translation.

Fixes a bug in that plugin_insn_append should be passed the bytes
in the original memory order, not bswapped by pieces.

Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2022-08-10 21:39:29 -07:00
parent 306c872103
commit 50627f1b7b
3 changed files with 141 additions and 71 deletions

View File

@ -1385,8 +1385,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
{ {
CPUArchState *env = cpu->env_ptr; CPUArchState *env = cpu->env_ptr;
TranslationBlock *tb, *existing_tb; TranslationBlock *tb, *existing_tb;
tb_page_addr_t phys_pc, phys_page2; tb_page_addr_t phys_pc;
target_ulong virt_page2;
tcg_insn_unit *gen_code_buf; tcg_insn_unit *gen_code_buf;
int gen_code_size, search_size, max_insns; int gen_code_size, search_size, max_insns;
#ifdef CONFIG_PROFILER #ifdef CONFIG_PROFILER
@ -1429,6 +1428,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tb->flags = flags; tb->flags = flags;
tb->cflags = cflags; tb->cflags = cflags;
tb->trace_vcpu_dstate = *cpu->trace_dstate; tb->trace_vcpu_dstate = *cpu->trace_dstate;
tb->page_addr[0] = phys_pc;
tb->page_addr[1] = -1;
tcg_ctx->tb_cflags = cflags; tcg_ctx->tb_cflags = cflags;
tb_overflow: tb_overflow:
@ -1622,13 +1623,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
} }
/* /*
* If the TB is not associated with a physical RAM page then * If the TB is not associated with a physical RAM page then it must be
* it must be a temporary one-insn TB, and we have nothing to do * a temporary one-insn TB, and we have nothing left to do. Return early
* except fill in the page_addr[] fields. Return early before * before attempting to link to other TBs or add to the lookup table.
* attempting to link to other TBs or add to the lookup table.
*/ */
if (phys_pc == -1) { if (tb->page_addr[0] == -1) {
tb->page_addr[0] = tb->page_addr[1] = -1;
return tb; return tb;
} }
@ -1639,17 +1638,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
*/ */
tcg_tb_insert(tb); tcg_tb_insert(tb);
/* check next page if needed */
virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
phys_page2 = -1;
if ((pc & TARGET_PAGE_MASK) != virt_page2) {
phys_page2 = get_page_addr_code(env, virt_page2);
}
/* /*
* No explicit memory barrier is required -- tb_link_page() makes the * No explicit memory barrier is required -- tb_link_page() makes the
* TB visible in a consistent state. * TB visible in a consistent state.
*/ */
existing_tb = tb_link_page(tb, phys_pc, phys_page2); existing_tb = tb_link_page(tb, tb->page_addr[0], tb->page_addr[1]);
/* if the TB already exists, discard what we just translated */ /* if the TB already exists, discard what we just translated */
if (unlikely(existing_tb != tb)) { if (unlikely(existing_tb != tb)) {
uintptr_t orig_aligned = (uintptr_t)gen_code_buf; uintptr_t orig_aligned = (uintptr_t)gen_code_buf;

View File

@ -42,15 +42,6 @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest)
return ((db->pc_first ^ dest) & TARGET_PAGE_MASK) == 0; return ((db->pc_first ^ dest) & TARGET_PAGE_MASK) == 0;
} }
static inline void translator_page_protect(DisasContextBase *dcbase,
target_ulong pc)
{
#ifdef CONFIG_USER_ONLY
dcbase->page_protect_end = pc | ~TARGET_PAGE_MASK;
page_protect(pc);
#endif
}
void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns, void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
target_ulong pc, void *host_pc, target_ulong pc, void *host_pc,
const TranslatorOps *ops, DisasContextBase *db) const TranslatorOps *ops, DisasContextBase *db)
@ -66,7 +57,12 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
db->num_insns = 0; db->num_insns = 0;
db->max_insns = max_insns; db->max_insns = max_insns;
db->singlestep_enabled = cflags & CF_SINGLE_STEP; db->singlestep_enabled = cflags & CF_SINGLE_STEP;
translator_page_protect(db, db->pc_next); db->host_addr[0] = host_pc;
db->host_addr[1] = NULL;
#ifdef CONFIG_USER_ONLY
page_protect(pc);
#endif
ops->init_disas_context(db, cpu); ops->init_disas_context(db, cpu);
tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */
@ -151,31 +147,103 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
#endif #endif
} }
static inline void translator_maybe_page_protect(DisasContextBase *dcbase, static void *translator_access(CPUArchState *env, DisasContextBase *db,
target_ulong pc, size_t len) target_ulong pc, size_t len)
{ {
#ifdef CONFIG_USER_ONLY void *host;
target_ulong end = pc + len - 1; target_ulong base, end;
TranslationBlock *tb;
if (end > dcbase->page_protect_end) { tb = db->tb;
translator_page_protect(dcbase, end);
/* Use slow path if first page is MMIO. */
if (unlikely(tb->page_addr[0] == -1)) {
return NULL;
} }
end = pc + len - 1;
if (likely(is_same_page(db, end))) {
host = db->host_addr[0];
base = db->pc_first;
} else {
host = db->host_addr[1];
base = TARGET_PAGE_ALIGN(db->pc_first);
if (host == NULL) {
tb->page_addr[1] =
get_page_addr_code_hostp(env, base, &db->host_addr[1]);
#ifdef CONFIG_USER_ONLY
page_protect(end);
#endif #endif
/* We cannot handle MMIO as second page. */
assert(tb->page_addr[1] != -1);
host = db->host_addr[1];
}
/* Use slow path when crossing pages. */
if (is_same_page(db, pc)) {
return NULL;
}
}
tcg_debug_assert(pc >= base);
return host + (pc - base);
} }
#define GEN_TRANSLATOR_LD(fullname, type, load_fn, swap_fn) \ uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db, abi_ptr pc)
type fullname ## _swap(CPUArchState *env, DisasContextBase *dcbase, \ {
abi_ptr pc, bool do_swap) \ uint8_t ret;
{ \ void *p = translator_access(env, db, pc, sizeof(ret));
translator_maybe_page_protect(dcbase, pc, sizeof(type)); \
type ret = load_fn(env, pc); \ if (p) {
if (do_swap) { \ plugin_insn_append(pc, p, sizeof(ret));
ret = swap_fn(ret); \ return ldub_p(p);
} \
plugin_insn_append(pc, &ret, sizeof(ret)); \
return ret; \
} }
ret = cpu_ldub_code(env, pc);
plugin_insn_append(pc, &ret, sizeof(ret));
return ret;
}
FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD) uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db, abi_ptr pc)
{
uint16_t ret, plug;
void *p = translator_access(env, db, pc, sizeof(ret));
#undef GEN_TRANSLATOR_LD if (p) {
plugin_insn_append(pc, p, sizeof(ret));
return lduw_p(p);
}
ret = cpu_lduw_code(env, pc);
plug = tswap16(ret);
plugin_insn_append(pc, &plug, sizeof(ret));
return ret;
}
uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db, abi_ptr pc)
{
uint32_t ret, plug;
void *p = translator_access(env, db, pc, sizeof(ret));
if (p) {
plugin_insn_append(pc, p, sizeof(ret));
return ldl_p(p);
}
ret = cpu_ldl_code(env, pc);
plug = tswap32(ret);
plugin_insn_append(pc, &plug, sizeof(ret));
return ret;
}
uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, abi_ptr pc)
{
uint64_t ret, plug;
void *p = translator_access(env, db, pc, sizeof(ret));
if (p) {
plugin_insn_append(pc, p, sizeof(ret));
return ldq_p(p);
}
ret = cpu_ldq_code(env, pc);
plug = tswap64(ret);
plugin_insn_append(pc, &plug, sizeof(ret));
return ret;
}

View File

@ -81,24 +81,14 @@ typedef enum DisasJumpType {
* Architecture-agnostic disassembly context. * Architecture-agnostic disassembly context.
*/ */
typedef struct DisasContextBase { typedef struct DisasContextBase {
const TranslationBlock *tb; TranslationBlock *tb;
target_ulong pc_first; target_ulong pc_first;
target_ulong pc_next; target_ulong pc_next;
DisasJumpType is_jmp; DisasJumpType is_jmp;
int num_insns; int num_insns;
int max_insns; int max_insns;
bool singlestep_enabled; bool singlestep_enabled;
#ifdef CONFIG_USER_ONLY void *host_addr[2];
/*
* Guest address of the last byte of the last protected page.
*
* Pages containing the translated instructions are made non-writable in
* order to achieve consistency in case another thread is modifying the
* code while translate_insn() fetches the instruction bytes piecemeal.
* Such writer threads are blocked on mmap_lock() in page_unprotect().
*/
target_ulong page_protect_end;
#endif
} DisasContextBase; } DisasContextBase;
/** /**
@ -183,24 +173,43 @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest);
* the relevant information at translation time. * the relevant information at translation time.
*/ */
#define GEN_TRANSLATOR_LD(fullname, type, load_fn, swap_fn) \ uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
type fullname ## _swap(CPUArchState *env, DisasContextBase *dcbase, \ uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
abi_ptr pc, bool do_swap); \ uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
static inline type fullname(CPUArchState *env, \ uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
DisasContextBase *dcbase, abi_ptr pc) \
{ \ static inline uint16_t
return fullname ## _swap(env, dcbase, pc, false); \ translator_lduw_swap(CPUArchState *env, DisasContextBase *db,
abi_ptr pc, bool do_swap)
{
uint16_t ret = translator_lduw(env, db, pc);
if (do_swap) {
ret = bswap16(ret);
} }
return ret;
}
#define FOR_EACH_TRANSLATOR_LD(F) \ static inline uint32_t
F(translator_ldub, uint8_t, cpu_ldub_code, /* no swap */) \ translator_ldl_swap(CPUArchState *env, DisasContextBase *db,
F(translator_lduw, uint16_t, cpu_lduw_code, bswap16) \ abi_ptr pc, bool do_swap)
F(translator_ldl, uint32_t, cpu_ldl_code, bswap32) \ {
F(translator_ldq, uint64_t, cpu_ldq_code, bswap64) uint32_t ret = translator_ldl(env, db, pc);
if (do_swap) {
ret = bswap32(ret);
}
return ret;
}
FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD) static inline uint64_t
translator_ldq_swap(CPUArchState *env, DisasContextBase *db,
#undef GEN_TRANSLATOR_LD abi_ptr pc, bool do_swap)
{
uint64_t ret = translator_ldq(env, db, pc);
if (do_swap) {
ret = bswap64(ret);
}
return ret;
}
/* /*
* Return whether addr is on the same page as where disassembly started. * Return whether addr is on the same page as where disassembly started.