From fbd3c4cff641cae082edb765017cbb699efa6712 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 10 Aug 2022 17:22:04 -0700 Subject: [PATCH 01/20] linux-user/arm: Mark the commpage executable We're about to start validating PAGE_EXEC, which means that we've got to mark the commpage executable. We had been placing the commpage outside of reserved_va, which was incorrect and lead to an abort. Acked-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Signed-off-by: Richard Henderson --- linux-user/arm/target_cpu.h | 4 ++-- linux-user/elfload.c | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/linux-user/arm/target_cpu.h b/linux-user/arm/target_cpu.h index 709d19bc9e..89ba274cfc 100644 --- a/linux-user/arm/target_cpu.h +++ b/linux-user/arm/target_cpu.h @@ -34,9 +34,9 @@ static inline unsigned long arm_max_reserved_va(CPUState *cs) } else { /* * We need to be able to map the commpage. - * See validate_guest_space in linux-user/elfload.c. + * See init_guest_commpage in linux-user/elfload.c. */ - return 0xffff0000ul; + return 0xfffffffful; } } #define MAX_RESERVED_VA arm_max_reserved_va diff --git a/linux-user/elfload.c b/linux-user/elfload.c index ce902dbd56..3e3dc02499 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -398,7 +398,8 @@ enum { static bool init_guest_commpage(void) { - void *want = g2h_untagged(HI_COMMPAGE & -qemu_host_page_size); + abi_ptr commpage = HI_COMMPAGE & -qemu_host_page_size; + void *want = g2h_untagged(commpage); void *addr = mmap(want, qemu_host_page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); @@ -417,6 +418,9 @@ static bool init_guest_commpage(void) perror("Protecting guest commpage"); exit(EXIT_FAILURE); } + + page_set_flags(commpage, commpage + qemu_host_page_size, + PAGE_READ | PAGE_EXEC | PAGE_VALID); return true; } From eee816c0b972bd085e62fcd7d2de7a34dbbd155b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 10 Aug 2022 18:44:48 -0700 Subject: [PATCH 02/20] linux-user/hppa: Allocate page zero as a commpage We're about to start validating PAGE_EXEC, which means that we've got to mark page zero executable. We had been special casing this entirely within translate. Acked-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Signed-off-by: Richard Henderson --- linux-user/elfload.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 3e3dc02499..29d910c4cc 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1646,6 +1646,34 @@ static inline void init_thread(struct target_pt_regs *regs, regs->gr[31] = infop->entry; } +#define LO_COMMPAGE 0 + +static bool init_guest_commpage(void) +{ + void *want = g2h_untagged(LO_COMMPAGE); + void *addr = mmap(want, qemu_host_page_size, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + + if (addr == MAP_FAILED) { + perror("Allocating guest commpage"); + exit(EXIT_FAILURE); + } + if (addr != want) { + return false; + } + + /* + * On Linux, page zero is normally marked execute only + gateway. + * Normal read or write is supposed to fail (thus PROT_NONE above), + * but specific offsets have kernel code mapped to raise permissions + * and implement syscalls. Here, simply mark the page executable. + * Special case the entry points during translation (see do_page_zero). + */ + page_set_flags(LO_COMMPAGE, LO_COMMPAGE + TARGET_PAGE_SIZE, + PAGE_EXEC | PAGE_VALID); + return true; +} + #endif /* TARGET_HPPA */ #ifdef TARGET_XTENSA @@ -2326,12 +2354,12 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, } #if defined(HI_COMMPAGE) -#define LO_COMMPAGE 0 +#define LO_COMMPAGE -1 #elif defined(LO_COMMPAGE) #define HI_COMMPAGE 0 #else #define HI_COMMPAGE 0 -#define LO_COMMPAGE 0 +#define LO_COMMPAGE -1 #define init_guest_commpage() true #endif @@ -2555,7 +2583,7 @@ static void pgb_static(const char *image_name, abi_ulong orig_loaddr, } else { offset = -(HI_COMMPAGE & -align); } - } else if (LO_COMMPAGE != 0) { + } else if (LO_COMMPAGE != -1) { loaddr = MIN(loaddr, LO_COMMPAGE & -align); } From d461b73ec007ccbe861db91bbcd792c92e46fcad Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 11 Aug 2022 11:18:29 -0700 Subject: [PATCH 03/20] linux-user/x86_64: Allocate vsyscall page as a commpage We're about to start validating PAGE_EXEC, which means that we've got to mark the vsyscall page executable. We had been special casing this entirely within translate. Acked-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Signed-off-by: Richard Henderson --- linux-user/elfload.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 29d910c4cc..82fecf9e5a 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -195,6 +195,27 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *en (*regs)[26] = tswapreg(env->segs[R_GS].selector & 0xffff); } +#if ULONG_MAX > UINT32_MAX +#define INIT_GUEST_COMMPAGE +static bool init_guest_commpage(void) +{ + /* + * The vsyscall page is at a high negative address aka kernel space, + * which means that we cannot actually allocate it with target_mmap. + * We still should be able to use page_set_flags, unless the user + * has specified -R reserved_va, which would trigger an assert(). + */ + if (reserved_va != 0 && + TARGET_VSYSCALL_PAGE + TARGET_PAGE_SIZE >= reserved_va) { + error_report("Cannot allocate vsyscall page"); + exit(EXIT_FAILURE); + } + page_set_flags(TARGET_VSYSCALL_PAGE, + TARGET_VSYSCALL_PAGE + TARGET_PAGE_SIZE, + PAGE_EXEC | PAGE_VALID); + return true; +} +#endif #else #define ELF_START_MMAP 0x80000000 @@ -2360,8 +2381,10 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, #else #define HI_COMMPAGE 0 #define LO_COMMPAGE -1 +#ifndef INIT_GUEST_COMMPAGE #define init_guest_commpage() true #endif +#endif static void pgb_fail_in_use(const char *image_name) { From 872f3d046f2381e3f416519e82df96bd60818311 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 10 Aug 2022 23:00:52 -0700 Subject: [PATCH 04/20] linux-user: Honor PT_GNU_STACK Map the stack executable if required by default or on demand. Acked-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Signed-off-by: Richard Henderson --- include/elf.h | 1 + linux-user/elfload.c | 19 ++++++++++++++++++- linux-user/qemu.h | 1 + 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/include/elf.h b/include/elf.h index 3a4bcb646a..3d6b9062c0 100644 --- a/include/elf.h +++ b/include/elf.h @@ -31,6 +31,7 @@ typedef int64_t Elf64_Sxword; #define PT_LOPROC 0x70000000 #define PT_HIPROC 0x7fffffff +#define PT_GNU_STACK (PT_LOOS + 0x474e551) #define PT_GNU_PROPERTY (PT_LOOS + 0x474e553) #define PT_MIPS_REGINFO 0x70000000 diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 82fecf9e5a..ba5c4c02e5 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -232,6 +232,7 @@ static bool init_guest_commpage(void) #define ELF_ARCH EM_386 #define ELF_PLATFORM get_elf_platform() +#define EXSTACK_DEFAULT true static const char *get_elf_platform(void) { @@ -308,6 +309,7 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *en #define ELF_ARCH EM_ARM #define ELF_CLASS ELFCLASS32 +#define EXSTACK_DEFAULT true static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) @@ -776,6 +778,7 @@ static inline void init_thread(struct target_pt_regs *regs, #else #define ELF_CLASS ELFCLASS32 +#define EXSTACK_DEFAULT true #endif @@ -973,6 +976,7 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUPPCState *en #define ELF_CLASS ELFCLASS64 #define ELF_ARCH EM_LOONGARCH +#define EXSTACK_DEFAULT true #define elf_check_arch(x) ((x) == EM_LOONGARCH) @@ -1068,6 +1072,7 @@ static uint32_t get_elf_hwcap(void) #define ELF_CLASS ELFCLASS32 #endif #define ELF_ARCH EM_MIPS +#define EXSTACK_DEFAULT true #ifdef TARGET_ABI_MIPSN32 #define elf_check_abi(x) ((x) & EF_MIPS_ABI2) @@ -1806,6 +1811,10 @@ static inline void init_thread(struct target_pt_regs *regs, #define bswaptls(ptr) bswap32s(ptr) #endif +#ifndef EXSTACK_DEFAULT +#define EXSTACK_DEFAULT false +#endif + #include "elf.h" /* We must delay the following stanzas until after "elf.h". */ @@ -2081,6 +2090,7 @@ static abi_ulong setup_arg_pages(struct linux_binprm *bprm, struct image_info *info) { abi_ulong size, error, guard; + int prot; size = guest_stack_size; if (size < STACK_LOWER_LIMIT) { @@ -2091,7 +2101,11 @@ static abi_ulong setup_arg_pages(struct linux_binprm *bprm, guard = qemu_real_host_page_size(); } - error = target_mmap(0, size + guard, PROT_READ | PROT_WRITE, + prot = PROT_READ | PROT_WRITE; + if (info->exec_stack) { + prot |= PROT_EXEC; + } + error = target_mmap(0, size + guard, prot, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (error == -1) { perror("mmap stack"); @@ -2921,6 +2935,7 @@ static void load_elf_image(const char *image_name, int image_fd, */ loaddr = -1, hiaddr = 0; info->alignment = 0; + info->exec_stack = EXSTACK_DEFAULT; for (i = 0; i < ehdr->e_phnum; ++i) { struct elf_phdr *eppnt = phdr + i; if (eppnt->p_type == PT_LOAD) { @@ -2963,6 +2978,8 @@ static void load_elf_image(const char *image_name, int image_fd, if (!parse_elf_properties(image_fd, info, eppnt, bprm_buf, &err)) { goto exit_errmsg; } + } else if (eppnt->p_type == PT_GNU_STACK) { + info->exec_stack = eppnt->p_flags & PF_X; } } diff --git a/linux-user/qemu.h b/linux-user/qemu.h index 7d90de1b15..e2e93fbd1d 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -48,6 +48,7 @@ struct image_info { uint32_t elf_flags; int personality; abi_ulong alignment; + bool exec_stack; /* Generic semihosting knows about these pointers. */ abi_ulong arg_strings; /* strings for argv */ From aa98e2d885ca9de66ddd56d88b19b0e162cc90d7 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 17 Aug 2022 17:05:03 +0200 Subject: [PATCH 05/20] linux-user: Clear translations on mprotect() Currently it's possible to execute pages that do not have PAGE_EXEC if there is an existing translation block. Fix by invalidating TBs that touch the affected pages. Signed-off-by: Ilya Leoshkevich Message-Id: <20220817150506.592862-2-iii@linux.ibm.com> Signed-off-by: Richard Henderson --- linux-user/mmap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 048c4135af..6a828e8418 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -177,9 +177,11 @@ int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) goto error; } } + page_set_flags(start, start + len, page_flags); - mmap_unlock(); - return 0; + tb_invalidate_phys_range(start, start + len); + ret = 0; + error: mmap_unlock(); return ret; From d64655c2c35d967e9c496937885c511240204e76 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 11 Aug 2022 10:58:36 -0700 Subject: [PATCH 06/20] tests/tcg/i386: Move smc_code2 to an executable section We're about to start validating PAGE_EXEC, which means that we've got to put this code into a section that is both writable and executable. Note that this test did not run on hardware beforehand either. Acked-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Signed-off-by: Richard Henderson --- tests/tcg/i386/test-i386.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tcg/i386/test-i386.c b/tests/tcg/i386/test-i386.c index ac8d5a3c1f..e6b308a2c0 100644 --- a/tests/tcg/i386/test-i386.c +++ b/tests/tcg/i386/test-i386.c @@ -1998,7 +1998,7 @@ uint8_t code[] = { 0xc3, /* ret */ }; -asm(".section \".data\"\n" +asm(".section \".data_x\",\"awx\"\n" "smc_code2:\n" "movl 4(%esp), %eax\n" "movl %eax, smc_patch_addr2 + 1\n" From f3b2b81b6fdaacfa8a769ad5834e6f6f129e2119 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 11 Aug 2022 11:55:31 +0200 Subject: [PATCH 07/20] accel/tcg: Introduce is_same_page() Introduce a function that checks whether a given address is on the same page as where disassembly started. Having it improves readability of the following patches. Reviewed-by: Alistair Francis Signed-off-by: Ilya Leoshkevich Message-Id: <20220811095534.241224-3-iii@linux.ibm.com> Reviewed-by: Richard Henderson [rth: Make the DisasContextBase parameter const.] Signed-off-by: Richard Henderson --- include/exec/translator.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/exec/translator.h b/include/exec/translator.h index 7db6845535..0d0bf3a31e 100644 --- a/include/exec/translator.h +++ b/include/exec/translator.h @@ -187,4 +187,14 @@ FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD) #undef GEN_TRANSLATOR_LD +/* + * Return whether addr is on the same page as where disassembly started. + * Translators can use this to enforce the rule that only single-insn + * translation blocks are allowed to cross page boundaries. + */ +static inline bool is_same_page(const DisasContextBase *db, target_ulong addr) +{ + return ((addr ^ db->pc_first) & TARGET_PAGE_MASK) == 0; +} + #endif /* EXEC__TRANSLATOR_H */ From cdf7130851318004e6512dbfdb73156fe59c7a59 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 10 Aug 2022 13:52:50 -0700 Subject: [PATCH 08/20] accel/tcg: Properly implement get_page_addr_code for user-only The current implementation is a no-op, simply returning addr. This is incorrect, because we ought to be checking the page permissions for execution. Make get_page_addr_code inline for both implementations. Acked-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Acked-by: Alistair Francis Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 5 --- accel/tcg/user-exec.c | 14 +++++++ include/exec/exec-all.h | 85 ++++++++++++++--------------------------- 3 files changed, 42 insertions(+), 62 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index a46f3a654d..43bd65c973 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1544,11 +1544,6 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, return qemu_ram_addr_from_host_nofail(p); } -tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) -{ - return get_page_addr_code_hostp(env, addr, NULL); -} - static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) { diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index 20ada5472b..2bc4394b80 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -199,6 +199,20 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size, return size ? g2h(env_cpu(env), addr) : NULL; } +tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, + void **hostp) +{ + int flags; + + flags = probe_access_internal(env, addr, 1, MMU_INST_FETCH, false, 0); + g_assert(flags == 0); + + if (hostp) { + *hostp = g2h_untagged(addr); + } + return addr; +} + /* The softmmu versions of these helpers are in cputlb.c. */ /* diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 311e5fb422..0475ec6007 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -598,43 +598,44 @@ struct MemoryRegionSection *iotlb_to_section(CPUState *cpu, hwaddr index, MemTxAttrs attrs); #endif -#if defined(CONFIG_USER_ONLY) -void mmap_lock(void); -void mmap_unlock(void); -bool have_mmap_lock(void); - /** - * get_page_addr_code() - user-mode version + * get_page_addr_code_hostp() * @env: CPUArchState * @addr: guest virtual address of guest code * - * Returns @addr. + * See get_page_addr_code() (full-system version) for documentation on the + * return value. + * + * Sets *@hostp (when @hostp is non-NULL) as follows. + * If the return value is -1, sets *@hostp to NULL. Otherwise, sets *@hostp + * to the host address where @addr's content is kept. + * + * Note: this function can trigger an exception. + */ +tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, + void **hostp); + +/** + * get_page_addr_code() + * @env: CPUArchState + * @addr: guest virtual address of guest code + * + * If we cannot translate and execute from the entire RAM page, or if + * the region is not backed by RAM, returns -1. Otherwise, returns the + * ram_addr_t corresponding to the guest code at @addr. + * + * Note: this function can trigger an exception. */ static inline tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) { - return addr; + return get_page_addr_code_hostp(env, addr, NULL); } -/** - * get_page_addr_code_hostp() - user-mode version - * @env: CPUArchState - * @addr: guest virtual address of guest code - * - * Returns @addr. - * - * If @hostp is non-NULL, sets *@hostp to the host address where @addr's content - * is kept. - */ -static inline tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, - target_ulong addr, - void **hostp) -{ - if (hostp) { - *hostp = g2h_untagged(addr); - } - return addr; -} +#if defined(CONFIG_USER_ONLY) +void mmap_lock(void); +void mmap_unlock(void); +bool have_mmap_lock(void); /** * adjust_signal_pc: @@ -691,36 +692,6 @@ G_NORETURN void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr, static inline void mmap_lock(void) {} static inline void mmap_unlock(void) {} -/** - * get_page_addr_code() - full-system version - * @env: CPUArchState - * @addr: guest virtual address of guest code - * - * If we cannot translate and execute from the entire RAM page, or if - * the region is not backed by RAM, returns -1. Otherwise, returns the - * ram_addr_t corresponding to the guest code at @addr. - * - * Note: this function can trigger an exception. - */ -tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr); - -/** - * get_page_addr_code_hostp() - full-system version - * @env: CPUArchState - * @addr: guest virtual address of guest code - * - * See get_page_addr_code() (full-system version) for documentation on the - * return value. - * - * Sets *@hostp (when @hostp is non-NULL) as follows. - * If the return value is -1, sets *@hostp to NULL. Otherwise, sets *@hostp - * to the host address where @addr's content is kept. - * - * Note: this function can trigger an exception. - */ -tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, - void **hostp); - void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length); void tlb_set_dirty(CPUState *cpu, target_ulong vaddr); From 297368c74d077b24f0bb5c4f1ce541cc23d36815 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 10 Aug 2022 17:14:26 -0700 Subject: [PATCH 09/20] accel/tcg: Unlock mmap_lock after longjmp The mmap_lock is held around tb_gen_code. While the comment is correct that the lock is dropped when tb_gen_code runs out of memory, the lock is *not* dropped when an exception is raised reading code for translation. Acked-by: Alistair Francis Acked-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 12 ++++++------ accel/tcg/user-exec.c | 3 --- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index a565a3f8ec..d18081ca6f 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -462,13 +462,11 @@ void cpu_exec_step_atomic(CPUState *cpu) cpu_tb_exec(cpu, tb, &tb_exit); cpu_exec_exit(cpu); } else { - /* - * The mmap_lock is dropped by tb_gen_code if it runs out of - * memory. - */ #ifndef CONFIG_SOFTMMU clear_helper_retaddr(); - tcg_debug_assert(!have_mmap_lock()); + if (have_mmap_lock()) { + mmap_unlock(); + } #endif if (qemu_mutex_iothread_locked()) { qemu_mutex_unlock_iothread(); @@ -936,7 +934,9 @@ int cpu_exec(CPUState *cpu) #ifndef CONFIG_SOFTMMU clear_helper_retaddr(); - tcg_debug_assert(!have_mmap_lock()); + if (have_mmap_lock()) { + mmap_unlock(); + } #endif if (qemu_mutex_iothread_locked()) { qemu_mutex_unlock_iothread(); diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index 2bc4394b80..521aa8b61e 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -80,10 +80,7 @@ MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write) * (and if the translator doesn't handle page boundaries correctly * there's little we can do about that here). Therefore, do not * trigger the unwinder. - * - * Like tb_gen_code, release the memory lock before cpu_loop_exit. */ - mmap_unlock(); *pc = 0; return MMU_INST_FETCH; } From 0c90ba16e3b51381dc82b0f54532c6609261aea0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 16 Aug 2022 13:53:18 -0500 Subject: [PATCH 10/20] accel/tcg: Make tb_htable_lookup static The function is not used outside of cpu-exec.c. Move it and its subroutines up in the file, before the first use. Reviewed-by: Alistair Francis Acked-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 122 ++++++++++++++++++++-------------------- include/exec/exec-all.h | 3 - 2 files changed, 61 insertions(+), 64 deletions(-) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index d18081ca6f..7887af6f45 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -170,6 +170,67 @@ uint32_t curr_cflags(CPUState *cpu) return cflags; } +struct tb_desc { + target_ulong pc; + target_ulong cs_base; + CPUArchState *env; + tb_page_addr_t phys_page1; + uint32_t flags; + uint32_t cflags; + uint32_t trace_vcpu_dstate; +}; + +static bool tb_lookup_cmp(const void *p, const void *d) +{ + const TranslationBlock *tb = p; + const struct tb_desc *desc = d; + + if (tb->pc == desc->pc && + tb->page_addr[0] == desc->phys_page1 && + tb->cs_base == desc->cs_base && + tb->flags == desc->flags && + tb->trace_vcpu_dstate == desc->trace_vcpu_dstate && + tb_cflags(tb) == desc->cflags) { + /* check next page if needed */ + if (tb->page_addr[1] == -1) { + return true; + } else { + tb_page_addr_t phys_page2; + target_ulong virt_page2; + + virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE; + phys_page2 = get_page_addr_code(desc->env, virt_page2); + if (tb->page_addr[1] == phys_page2) { + return true; + } + } + } + return false; +} + +static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, + target_ulong cs_base, uint32_t flags, + uint32_t cflags) +{ + tb_page_addr_t phys_pc; + struct tb_desc desc; + uint32_t h; + + desc.env = cpu->env_ptr; + desc.cs_base = cs_base; + desc.flags = flags; + desc.cflags = cflags; + desc.trace_vcpu_dstate = *cpu->trace_dstate; + desc.pc = pc; + phys_pc = get_page_addr_code(desc.env, pc); + if (phys_pc == -1) { + return NULL; + } + desc.phys_page1 = phys_pc & TARGET_PAGE_MASK; + h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate); + return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp); +} + /* Might cause an exception, so have a longjmp destination ready */ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, target_ulong cs_base, @@ -485,67 +546,6 @@ void cpu_exec_step_atomic(CPUState *cpu) end_exclusive(); } -struct tb_desc { - target_ulong pc; - target_ulong cs_base; - CPUArchState *env; - tb_page_addr_t phys_page1; - uint32_t flags; - uint32_t cflags; - uint32_t trace_vcpu_dstate; -}; - -static bool tb_lookup_cmp(const void *p, const void *d) -{ - const TranslationBlock *tb = p; - const struct tb_desc *desc = d; - - if (tb->pc == desc->pc && - tb->page_addr[0] == desc->phys_page1 && - tb->cs_base == desc->cs_base && - tb->flags == desc->flags && - tb->trace_vcpu_dstate == desc->trace_vcpu_dstate && - tb_cflags(tb) == desc->cflags) { - /* check next page if needed */ - if (tb->page_addr[1] == -1) { - return true; - } else { - tb_page_addr_t phys_page2; - target_ulong virt_page2; - - virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE; - phys_page2 = get_page_addr_code(desc->env, virt_page2); - if (tb->page_addr[1] == phys_page2) { - return true; - } - } - } - return false; -} - -TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, - target_ulong cs_base, uint32_t flags, - uint32_t cflags) -{ - tb_page_addr_t phys_pc; - struct tb_desc desc; - uint32_t h; - - desc.env = cpu->env_ptr; - desc.cs_base = cs_base; - desc.flags = flags; - desc.cflags = cflags; - desc.trace_vcpu_dstate = *cpu->trace_dstate; - desc.pc = pc; - phys_pc = get_page_addr_code(desc.env, pc); - if (phys_pc == -1) { - return NULL; - } - desc.phys_page1 = phys_pc & TARGET_PAGE_MASK; - h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate); - return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp); -} - void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr) { if (TCG_TARGET_HAS_direct_jump) { diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 0475ec6007..9f35e3b7a9 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -552,9 +552,6 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs); #endif void tb_flush(CPUState *cpu); void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr); -TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, - target_ulong cs_base, uint32_t flags, - uint32_t cflags); void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr); /* GETPC is the true target of the return instruction that we'll execute. */ From 97e03465f7dac073434373428388eb6e0998ecea Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 10 Aug 2022 12:04:15 -0700 Subject: [PATCH 11/20] accel/tcg: Move qemu_ram_addr_from_host_nofail to physmem.c The base qemu_ram_addr_from_host function is already in softmmu/physmem.c; move the nofail version to be adjacent. Reviewed-by: Alistair Francis Acked-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 12 ------------ include/exec/cpu-common.h | 1 + softmmu/physmem.c | 12 ++++++++++++ 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 43bd65c973..80a3eb4f1c 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1283,18 +1283,6 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr, prot, mmu_idx, size); } -static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) -{ - ram_addr_t ram_addr; - - ram_addr = qemu_ram_addr_from_host(ptr); - if (ram_addr == RAM_ADDR_INVALID) { - error_report("Bad ram pointer %p", ptr); - abort(); - } - return ram_addr; -} - /* * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 2281be4e10..d909429427 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -72,6 +72,7 @@ typedef uintptr_t ram_addr_t; void qemu_ram_remap(ram_addr_t addr, ram_addr_t length); /* This should not be used by devices. */ ram_addr_t qemu_ram_addr_from_host(void *ptr); +ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr); RAMBlock *qemu_ram_block_by_name(const char *name); RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset, ram_addr_t *offset); diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 50231bab30..56e03e07b5 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -2443,6 +2443,18 @@ ram_addr_t qemu_ram_addr_from_host(void *ptr) return block->offset + offset; } +ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) +{ + ram_addr_t ram_addr; + + ram_addr = qemu_ram_addr_from_host(ptr); + if (ram_addr == RAM_ADDR_INVALID) { + error_report("Bad ram pointer %p", ptr); + abort(); + } + return ram_addr; +} + static MemTxResult flatview_read(FlatView *fv, hwaddr addr, MemTxAttrs attrs, void *buf, hwaddr len); static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, From 7e0d9973ea665bf459b2dbd173d0e51bc6ca5216 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 10 Aug 2022 14:13:30 -0700 Subject: [PATCH 12/20] accel/tcg: Use probe_access_internal for softmmu get_page_addr_code_hostp Simplify the implementation of get_page_addr_code_hostp by reusing the existing probe_access infrastructure. Acked-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 76 ++++++++++++++++------------------------------ 1 file changed, 26 insertions(+), 50 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 80a3eb4f1c..8fad2d9b83 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1482,56 +1482,6 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \ (ADDR) & TARGET_PAGE_MASK) -/* - * Return a ram_addr_t for the virtual address for execution. - * - * Return -1 if we can't translate and execute from an entire page - * of RAM. This will force us to execute by loading and translating - * one insn at a time, without caching. - * - * NOTE: This function will trigger an exception if the page is - * not executable. - */ -tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, - void **hostp) -{ - uintptr_t mmu_idx = cpu_mmu_index(env, true); - uintptr_t index = tlb_index(env, mmu_idx, addr); - CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - void *p; - - if (unlikely(!tlb_hit(entry->addr_code, addr))) { - if (!VICTIM_TLB_HIT(addr_code, addr)) { - tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - - if (unlikely(entry->addr_code & TLB_INVALID_MASK)) { - /* - * The MMU protection covers a smaller range than a target - * page, so we must redo the MMU check for every insn. - */ - return -1; - } - } - assert(tlb_hit(entry->addr_code, addr)); - } - - if (unlikely(entry->addr_code & TLB_MMIO)) { - /* The region is not backed by RAM. */ - if (hostp) { - *hostp = NULL; - } - return -1; - } - - p = (void *)((uintptr_t)addr + entry->addend); - if (hostp) { - *hostp = p; - } - return qemu_ram_addr_from_host_nofail(p); -} - static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) { @@ -1687,6 +1637,32 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, return flags ? NULL : host; } +/* + * Return a ram_addr_t for the virtual address for execution. + * + * Return -1 if we can't translate and execute from an entire page + * of RAM. This will force us to execute by loading and translating + * one insn at a time, without caching. + * + * NOTE: This function will trigger an exception if the page is + * not executable. + */ +tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, + void **hostp) +{ + void *p; + + (void)probe_access_internal(env, addr, 1, MMU_INST_FETCH, + cpu_mmu_index(env, true), false, &p, 0); + if (p == NULL) { + return -1; + } + if (hostp) { + *hostp = p; + } + return qemu_ram_addr_from_host_nofail(p); +} + #ifdef CONFIG_PLUGIN /* * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure. From 9867b30287ec98728e52ed49d03c666e86c90a04 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 22 Aug 2022 18:50:46 -0700 Subject: [PATCH 13/20] accel/tcg: Document the faulting lookup in tb_lookup_cmp It was non-obvious to me why we can raise an exception in the middle of a comparison function, but it works. While nearby, use TARGET_PAGE_ALIGN instead of open-coding. Acked-by: Ilya Leoshkevich Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 7887af6f45..5f43b9769a 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -198,7 +198,16 @@ static bool tb_lookup_cmp(const void *p, const void *d) tb_page_addr_t phys_page2; target_ulong virt_page2; - virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE; + /* + * We know that the first page matched, and an otherwise valid TB + * encountered an incomplete instruction at the end of that page, + * therefore we know that generating a new TB from the current PC + * must also require reading from the next page -- even if the + * second pages do not match, and therefore the resulting insn + * is different for the new TB. Therefore any exception raised + * here by the faulting lookup is not premature. + */ + virt_page2 = TARGET_PAGE_ALIGN(desc->pc); phys_page2 = get_page_addr_code(desc->env, virt_page2); if (tb->page_addr[1] == phys_page2) { return true; From dac8d19bdb3ccaafbcbd2df34135464964232a8f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 10 Aug 2022 21:05:31 -0700 Subject: [PATCH 14/20] accel/tcg: Remove translator_ldsw The only user can easily use translator_lduw and adjust the type to signed during the return. Reviewed-by: Alistair Francis Acked-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Signed-off-by: Richard Henderson --- include/exec/translator.h | 1 - target/i386/tcg/translate.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/include/exec/translator.h b/include/exec/translator.h index 0d0bf3a31e..45b9268ca4 100644 --- a/include/exec/translator.h +++ b/include/exec/translator.h @@ -178,7 +178,6 @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest); #define FOR_EACH_TRANSLATOR_LD(F) \ F(translator_ldub, uint8_t, cpu_ldub_code, /* no swap */) \ - F(translator_ldsw, int16_t, cpu_ldsw_code, bswap16) \ F(translator_lduw, uint16_t, cpu_lduw_code, bswap16) \ F(translator_ldl, uint32_t, cpu_ldl_code, bswap32) \ F(translator_ldq, uint64_t, cpu_ldq_code, bswap64) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index fc081e6ad6..983439d926 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2033,7 +2033,7 @@ static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s) static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s) { - return translator_ldsw(env, &s->base, advance_pc(env, s, 2)); + return translator_lduw(env, &s->base, advance_pc(env, s, 2)); } static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s) From 306c872103b4d0986c9f671eb7538b0b70bf69b5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 11 Aug 2022 13:48:03 -0700 Subject: [PATCH 15/20] accel/tcg: Add pc and host_pc params to gen_intermediate_code Pass these along to translator_loop -- pc may be used instead of tb->pc, and host_pc is currently unused. Adjust all targets at one time. Acked-by: Alistair Francis Acked-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Signed-off-by: Richard Henderson --- accel/tcg/translate-all.c | 6 ++++-- accel/tcg/translator.c | 9 +++++---- include/exec/exec-all.h | 1 - include/exec/translator.h | 24 ++++++++++++++++++++---- target/alpha/translate.c | 5 +++-- target/arm/translate.c | 5 +++-- target/avr/translate.c | 5 +++-- target/cris/translate.c | 5 +++-- target/hexagon/translate.c | 6 ++++-- target/hppa/translate.c | 5 +++-- target/i386/tcg/translate.c | 5 +++-- target/loongarch/translate.c | 6 ++++-- target/m68k/translate.c | 5 +++-- target/microblaze/translate.c | 5 +++-- target/mips/tcg/translate.c | 5 +++-- target/nios2/translate.c | 5 +++-- target/openrisc/translate.c | 6 ++++-- target/ppc/translate.c | 5 +++-- target/riscv/translate.c | 5 +++-- target/rx/translate.c | 5 +++-- target/s390x/tcg/translate.c | 5 +++-- target/sh4/translate.c | 5 +++-- target/sparc/translate.c | 5 +++-- target/tricore/translate.c | 6 ++++-- target/xtensa/translate.c | 6 ++++-- 25 files changed, 97 insertions(+), 53 deletions(-) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index b83161a081..587886aa4e 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -46,6 +46,7 @@ #include "exec/cputlb.h" #include "exec/translate-all.h" +#include "exec/translator.h" #include "qemu/bitmap.h" #include "qemu/qemu-print.h" #include "qemu/timer.h" @@ -1392,11 +1393,12 @@ TranslationBlock *tb_gen_code(CPUState *cpu, TCGProfile *prof = &tcg_ctx->prof; int64_t ti; #endif + void *host_pc; assert_memory_lock(); qemu_thread_jit_write(); - phys_pc = get_page_addr_code(env, pc); + phys_pc = get_page_addr_code_hostp(env, pc, &host_pc); if (phys_pc == -1) { /* Generate a one-shot TB with 1 insn in it */ @@ -1444,7 +1446,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tcg_func_start(tcg_ctx); tcg_ctx->cpu = env_cpu(env); - gen_intermediate_code(cpu, tb, max_insns); + gen_intermediate_code(cpu, tb, max_insns, pc, host_pc); assert(tb->size != 0); tcg_ctx->cpu = NULL; max_insns = tb->icount; diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index fe7af9b943..3eef30d93a 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -51,16 +51,17 @@ static inline void translator_page_protect(DisasContextBase *dcbase, #endif } -void translator_loop(const TranslatorOps *ops, DisasContextBase *db, - CPUState *cpu, TranslationBlock *tb, int max_insns) +void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc, + const TranslatorOps *ops, DisasContextBase *db) { uint32_t cflags = tb_cflags(tb); bool plugin_enabled; /* Initialize DisasContext */ db->tb = tb; - db->pc_first = tb->pc; - db->pc_next = db->pc_first; + db->pc_first = pc; + db->pc_next = pc; db->is_jmp = DISAS_NEXT; db->num_insns = 0; db->max_insns = max_insns; diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 9f35e3b7a9..bcad607c4e 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -39,7 +39,6 @@ typedef ram_addr_t tb_page_addr_t; #define TB_PAGE_ADDR_FMT RAM_ADDR_FMT #endif -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns); void restore_state_to_opc(CPUArchState *env, TranslationBlock *tb, target_ulong *data); diff --git a/include/exec/translator.h b/include/exec/translator.h index 45b9268ca4..69db0f5c21 100644 --- a/include/exec/translator.h +++ b/include/exec/translator.h @@ -26,6 +26,19 @@ #include "exec/translate-all.h" #include "tcg/tcg.h" +/** + * gen_intermediate_code + * @cpu: cpu context + * @tb: translation block + * @max_insns: max number of instructions to translate + * @pc: guest virtual program counter address + * @host_pc: host physical program counter address + * + * This function must be provided by the target, which should create + * the target-specific DisasContext, and then invoke translator_loop. + */ +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc); /** * DisasJumpType: @@ -123,11 +136,13 @@ typedef struct TranslatorOps { /** * translator_loop: - * @ops: Target-specific operations. - * @db: Disassembly context. * @cpu: Target vCPU. * @tb: Translation block. * @max_insns: Maximum number of insns to translate. + * @pc: guest virtual program counter address + * @host_pc: host physical program counter address + * @ops: Target-specific operations. + * @db: Disassembly context. * * Generic translator loop. * @@ -141,8 +156,9 @@ typedef struct TranslatorOps { * - When single-stepping is enabled (system-wide or on the current vCPU). * - When too many instructions have been translated. */ -void translator_loop(const TranslatorOps *ops, DisasContextBase *db, - CPUState *cpu, TranslationBlock *tb, int max_insns); +void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc, + const TranslatorOps *ops, DisasContextBase *db); void translator_loop_temp_check(DisasContextBase *db); diff --git a/target/alpha/translate.c b/target/alpha/translate.c index 9af1627079..6766350f56 100644 --- a/target/alpha/translate.c +++ b/target/alpha/translate.c @@ -3043,10 +3043,11 @@ static const TranslatorOps alpha_tr_ops = { .disas_log = alpha_tr_disas_log, }; -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext dc; - translator_loop(&alpha_tr_ops, &dc.base, cpu, tb, max_insns); + translator_loop(cpu, tb, max_insns, pc, host_pc, &alpha_tr_ops, &dc.base); } void restore_state_to_opc(CPUAlphaState *env, TranslationBlock *tb, diff --git a/target/arm/translate.c b/target/arm/translate.c index ad617b9948..9474e4b44b 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -9892,7 +9892,8 @@ static const TranslatorOps thumb_translator_ops = { }; /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext dc = { }; const TranslatorOps *ops = &arm_translator_ops; @@ -9907,7 +9908,7 @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) } #endif - translator_loop(ops, &dc.base, cpu, tb, max_insns); + translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base); } void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb, diff --git a/target/avr/translate.c b/target/avr/translate.c index 026753c963..e65b6008c0 100644 --- a/target/avr/translate.c +++ b/target/avr/translate.c @@ -3049,10 +3049,11 @@ static const TranslatorOps avr_tr_ops = { .disas_log = avr_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext dc = { }; - translator_loop(&avr_tr_ops, &dc.base, cs, tb, max_insns); + translator_loop(cs, tb, max_insns, pc, host_pc, &avr_tr_ops, &dc.base); } void restore_state_to_opc(CPUAVRState *env, TranslationBlock *tb, diff --git a/target/cris/translate.c b/target/cris/translate.c index ac101344a3..73385b0b3c 100644 --- a/target/cris/translate.c +++ b/target/cris/translate.c @@ -3286,10 +3286,11 @@ static const TranslatorOps cris_tr_ops = { .disas_log = cris_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext dc; - translator_loop(&cris_tr_ops, &dc.base, cs, tb, max_insns); + translator_loop(cs, tb, max_insns, pc, host_pc, &cris_tr_ops, &dc.base); } void cris_cpu_dump_state(CPUState *cs, FILE *f, int flags) diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index d4fc92f7e9..0e8a0772f7 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -850,11 +850,13 @@ static const TranslatorOps hexagon_tr_ops = { .disas_log = hexagon_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext ctx; - translator_loop(&hexagon_tr_ops, &ctx.base, cs, tb, max_insns); + translator_loop(cs, tb, max_insns, pc, host_pc, + &hexagon_tr_ops, &ctx.base); } #define NAME_LEN 64 diff --git a/target/hppa/translate.c b/target/hppa/translate.c index b8dbfee5e9..8b861957e0 100644 --- a/target/hppa/translate.c +++ b/target/hppa/translate.c @@ -4340,10 +4340,11 @@ static const TranslatorOps hppa_tr_ops = { .disas_log = hppa_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext ctx; - translator_loop(&hppa_tr_ops, &ctx.base, cs, tb, max_insns); + translator_loop(cs, tb, max_insns, pc, host_pc, &hppa_tr_ops, &ctx.base); } void restore_state_to_opc(CPUHPPAState *env, TranslationBlock *tb, diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 983439d926..ba2ab2b45d 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -8821,11 +8821,12 @@ static const TranslatorOps i386_tr_ops = { }; /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext dc; - translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns); + translator_loop(cpu, tb, max_insns, pc, host_pc, &i386_tr_ops, &dc.base); } void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb, diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c index 51ba291430..95b37ea180 100644 --- a/target/loongarch/translate.c +++ b/target/loongarch/translate.c @@ -241,11 +241,13 @@ static const TranslatorOps loongarch_tr_ops = { .disas_log = loongarch_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext ctx; - translator_loop(&loongarch_tr_ops, &ctx.base, cs, tb, max_insns); + translator_loop(cs, tb, max_insns, pc, host_pc, + &loongarch_tr_ops, &ctx.base); } void loongarch_translate_init(void) diff --git a/target/m68k/translate.c b/target/m68k/translate.c index 8f3c298ad0..5098f7e570 100644 --- a/target/m68k/translate.c +++ b/target/m68k/translate.c @@ -6361,10 +6361,11 @@ static const TranslatorOps m68k_tr_ops = { .disas_log = m68k_tr_disas_log, }; -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext dc; - translator_loop(&m68k_tr_ops, &dc.base, cpu, tb, max_insns); + translator_loop(cpu, tb, max_insns, pc, host_pc, &m68k_tr_ops, &dc.base); } static double floatx80_to_double(CPUM68KState *env, uint16_t high, uint64_t low) diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c index bf01384d33..c5546f93aa 100644 --- a/target/microblaze/translate.c +++ b/target/microblaze/translate.c @@ -1849,10 +1849,11 @@ static const TranslatorOps mb_tr_ops = { .disas_log = mb_tr_disas_log, }; -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext dc; - translator_loop(&mb_tr_ops, &dc.base, cpu, tb, max_insns); + translator_loop(cpu, tb, max_insns, pc, host_pc, &mb_tr_ops, &dc.base); } void mb_cpu_dump_state(CPUState *cs, FILE *f, int flags) diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c index de1511baaf..0d936e2648 100644 --- a/target/mips/tcg/translate.c +++ b/target/mips/tcg/translate.c @@ -16155,11 +16155,12 @@ static const TranslatorOps mips_tr_ops = { .disas_log = mips_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext ctx; - translator_loop(&mips_tr_ops, &ctx.base, cs, tb, max_insns); + translator_loop(cs, tb, max_insns, pc, host_pc, &mips_tr_ops, &ctx.base); } void mips_tcg_init(void) diff --git a/target/nios2/translate.c b/target/nios2/translate.c index 3a037a68cc..c588e8e885 100644 --- a/target/nios2/translate.c +++ b/target/nios2/translate.c @@ -1038,10 +1038,11 @@ static const TranslatorOps nios2_tr_ops = { .disas_log = nios2_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext dc; - translator_loop(&nios2_tr_ops, &dc.base, cs, tb, max_insns); + translator_loop(cs, tb, max_insns, pc, host_pc, &nios2_tr_ops, &dc.base); } void nios2_cpu_dump_state(CPUState *cs, FILE *f, int flags) diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c index 7b8ad43d5f..8154f9d744 100644 --- a/target/openrisc/translate.c +++ b/target/openrisc/translate.c @@ -1705,11 +1705,13 @@ static const TranslatorOps openrisc_tr_ops = { .disas_log = openrisc_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext ctx; - translator_loop(&openrisc_tr_ops, &ctx.base, cs, tb, max_insns); + translator_loop(cs, tb, max_insns, pc, host_pc, + &openrisc_tr_ops, &ctx.base); } void openrisc_cpu_dump_state(CPUState *cs, FILE *f, int flags) diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 388337f81b..000b1e518d 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -7719,11 +7719,12 @@ static const TranslatorOps ppc_tr_ops = { .disas_log = ppc_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext ctx; - translator_loop(&ppc_tr_ops, &ctx.base, cs, tb, max_insns); + translator_loop(cs, tb, max_insns, pc, host_pc, &ppc_tr_ops, &ctx.base); } void restore_state_to_opc(CPUPPCState *env, TranslationBlock *tb, diff --git a/target/riscv/translate.c b/target/riscv/translate.c index 63b04e8a94..38666ddc91 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -1196,11 +1196,12 @@ static const TranslatorOps riscv_tr_ops = { .disas_log = riscv_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext ctx; - translator_loop(&riscv_tr_ops, &ctx.base, cs, tb, max_insns); + translator_loop(cs, tb, max_insns, pc, host_pc, &riscv_tr_ops, &ctx.base); } void riscv_translate_init(void) diff --git a/target/rx/translate.c b/target/rx/translate.c index 62aee66937..ea5653bc95 100644 --- a/target/rx/translate.c +++ b/target/rx/translate.c @@ -2363,11 +2363,12 @@ static const TranslatorOps rx_tr_ops = { .disas_log = rx_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext dc; - translator_loop(&rx_tr_ops, &dc.base, cs, tb, max_insns); + translator_loop(cs, tb, max_insns, pc, host_pc, &rx_tr_ops, &dc.base); } void restore_state_to_opc(CPURXState *env, TranslationBlock *tb, diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index e2ee005671..d4c0b9b3a2 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -6676,11 +6676,12 @@ static const TranslatorOps s390x_tr_ops = { .disas_log = s390x_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext dc; - translator_loop(&s390x_tr_ops, &dc.base, cs, tb, max_insns); + translator_loop(cs, tb, max_insns, pc, host_pc, &s390x_tr_ops, &dc.base); } void restore_state_to_opc(CPUS390XState *env, TranslationBlock *tb, diff --git a/target/sh4/translate.c b/target/sh4/translate.c index f1b190e7cf..01056571c3 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -2368,11 +2368,12 @@ static const TranslatorOps sh4_tr_ops = { .disas_log = sh4_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext ctx; - translator_loop(&sh4_tr_ops, &ctx.base, cs, tb, max_insns); + translator_loop(cs, tb, max_insns, pc, host_pc, &sh4_tr_ops, &ctx.base); } void restore_state_to_opc(CPUSH4State *env, TranslationBlock *tb, diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 2e28222d31..2cbbe2396a 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -5917,11 +5917,12 @@ static const TranslatorOps sparc_tr_ops = { .disas_log = sparc_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext dc = {}; - translator_loop(&sparc_tr_ops, &dc.base, cs, tb, max_insns); + translator_loop(cs, tb, max_insns, pc, host_pc, &sparc_tr_ops, &dc.base); } void sparc_tcg_init(void) diff --git a/target/tricore/translate.c b/target/tricore/translate.c index d170500fa5..a0558ead71 100644 --- a/target/tricore/translate.c +++ b/target/tricore/translate.c @@ -8878,10 +8878,12 @@ static const TranslatorOps tricore_tr_ops = { }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext ctx; - translator_loop(&tricore_tr_ops, &ctx.base, cs, tb, max_insns); + translator_loop(cs, tb, max_insns, pc, host_pc, + &tricore_tr_ops, &ctx.base); } void diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c index 70e11eeb45..8b864ef925 100644 --- a/target/xtensa/translate.c +++ b/target/xtensa/translate.c @@ -1279,10 +1279,12 @@ static const TranslatorOps xtensa_translator_ops = { .disas_log = xtensa_tr_disas_log, }; -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns, + target_ulong pc, void *host_pc) { DisasContext dc = {}; - translator_loop(&xtensa_translator_ops, &dc.base, cpu, tb, max_insns); + translator_loop(cpu, tb, max_insns, pc, host_pc, + &xtensa_translator_ops, &dc.base); } void xtensa_cpu_dump_state(CPUState *cs, FILE *f, int flags) From 50627f1b7b1b1db60166a670fbc17623c7d7243e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 10 Aug 2022 21:39:29 -0700 Subject: [PATCH 16/20] accel/tcg: Add fast path for translator_ld* Cache the translation from guest to host address, so we may use direct loads when we hit on the primary translation page. Look up the second translation page only once, during translation. This obviates another lookup of the second page within tb_gen_code after translation. Fixes a bug in that plugin_insn_append should be passed the bytes in the original memory order, not bswapped by pieces. Acked-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Signed-off-by: Richard Henderson --- accel/tcg/translate-all.c | 23 +++---- accel/tcg/translator.c | 126 +++++++++++++++++++++++++++++--------- include/exec/translator.h | 63 +++++++++++-------- 3 files changed, 141 insertions(+), 71 deletions(-) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 587886aa4e..f5e8592d4a 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1385,8 +1385,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, { CPUArchState *env = cpu->env_ptr; TranslationBlock *tb, *existing_tb; - tb_page_addr_t phys_pc, phys_page2; - target_ulong virt_page2; + tb_page_addr_t phys_pc; tcg_insn_unit *gen_code_buf; int gen_code_size, search_size, max_insns; #ifdef CONFIG_PROFILER @@ -1429,6 +1428,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb->flags = flags; tb->cflags = cflags; tb->trace_vcpu_dstate = *cpu->trace_dstate; + tb->page_addr[0] = phys_pc; + tb->page_addr[1] = -1; tcg_ctx->tb_cflags = cflags; tb_overflow: @@ -1622,13 +1623,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } /* - * If the TB is not associated with a physical RAM page then - * it must be a temporary one-insn TB, and we have nothing to do - * except fill in the page_addr[] fields. Return early before - * attempting to link to other TBs or add to the lookup table. + * If the TB is not associated with a physical RAM page then it must be + * a temporary one-insn TB, and we have nothing left to do. Return early + * before attempting to link to other TBs or add to the lookup table. */ - if (phys_pc == -1) { - tb->page_addr[0] = tb->page_addr[1] = -1; + if (tb->page_addr[0] == -1) { return tb; } @@ -1639,17 +1638,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu, */ tcg_tb_insert(tb); - /* check next page if needed */ - virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK; - phys_page2 = -1; - if ((pc & TARGET_PAGE_MASK) != virt_page2) { - phys_page2 = get_page_addr_code(env, virt_page2); - } /* * No explicit memory barrier is required -- tb_link_page() makes the * TB visible in a consistent state. */ - existing_tb = tb_link_page(tb, phys_pc, phys_page2); + existing_tb = tb_link_page(tb, tb->page_addr[0], tb->page_addr[1]); /* if the TB already exists, discard what we just translated */ if (unlikely(existing_tb != tb)) { uintptr_t orig_aligned = (uintptr_t)gen_code_buf; diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index 3eef30d93a..ca8a5f2d83 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -42,15 +42,6 @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest) return ((db->pc_first ^ dest) & TARGET_PAGE_MASK) == 0; } -static inline void translator_page_protect(DisasContextBase *dcbase, - target_ulong pc) -{ -#ifdef CONFIG_USER_ONLY - dcbase->page_protect_end = pc | ~TARGET_PAGE_MASK; - page_protect(pc); -#endif -} - void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns, target_ulong pc, void *host_pc, const TranslatorOps *ops, DisasContextBase *db) @@ -66,7 +57,12 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns, db->num_insns = 0; db->max_insns = max_insns; db->singlestep_enabled = cflags & CF_SINGLE_STEP; - translator_page_protect(db, db->pc_next); + db->host_addr[0] = host_pc; + db->host_addr[1] = NULL; + +#ifdef CONFIG_USER_ONLY + page_protect(pc); +#endif ops->init_disas_context(db, cpu); tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ @@ -151,31 +147,103 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns, #endif } -static inline void translator_maybe_page_protect(DisasContextBase *dcbase, - target_ulong pc, size_t len) +static void *translator_access(CPUArchState *env, DisasContextBase *db, + target_ulong pc, size_t len) { -#ifdef CONFIG_USER_ONLY - target_ulong end = pc + len - 1; + void *host; + target_ulong base, end; + TranslationBlock *tb; - if (end > dcbase->page_protect_end) { - translator_page_protect(dcbase, end); + tb = db->tb; + + /* Use slow path if first page is MMIO. */ + if (unlikely(tb->page_addr[0] == -1)) { + return NULL; } + + end = pc + len - 1; + if (likely(is_same_page(db, end))) { + host = db->host_addr[0]; + base = db->pc_first; + } else { + host = db->host_addr[1]; + base = TARGET_PAGE_ALIGN(db->pc_first); + if (host == NULL) { + tb->page_addr[1] = + get_page_addr_code_hostp(env, base, &db->host_addr[1]); +#ifdef CONFIG_USER_ONLY + page_protect(end); #endif + /* We cannot handle MMIO as second page. */ + assert(tb->page_addr[1] != -1); + host = db->host_addr[1]; + } + + /* Use slow path when crossing pages. */ + if (is_same_page(db, pc)) { + return NULL; + } + } + + tcg_debug_assert(pc >= base); + return host + (pc - base); } -#define GEN_TRANSLATOR_LD(fullname, type, load_fn, swap_fn) \ - type fullname ## _swap(CPUArchState *env, DisasContextBase *dcbase, \ - abi_ptr pc, bool do_swap) \ - { \ - translator_maybe_page_protect(dcbase, pc, sizeof(type)); \ - type ret = load_fn(env, pc); \ - if (do_swap) { \ - ret = swap_fn(ret); \ - } \ - plugin_insn_append(pc, &ret, sizeof(ret)); \ - return ret; \ +uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db, abi_ptr pc) +{ + uint8_t ret; + void *p = translator_access(env, db, pc, sizeof(ret)); + + if (p) { + plugin_insn_append(pc, p, sizeof(ret)); + return ldub_p(p); } + ret = cpu_ldub_code(env, pc); + plugin_insn_append(pc, &ret, sizeof(ret)); + return ret; +} -FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD) +uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db, abi_ptr pc) +{ + uint16_t ret, plug; + void *p = translator_access(env, db, pc, sizeof(ret)); -#undef GEN_TRANSLATOR_LD + if (p) { + plugin_insn_append(pc, p, sizeof(ret)); + return lduw_p(p); + } + ret = cpu_lduw_code(env, pc); + plug = tswap16(ret); + plugin_insn_append(pc, &plug, sizeof(ret)); + return ret; +} + +uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db, abi_ptr pc) +{ + uint32_t ret, plug; + void *p = translator_access(env, db, pc, sizeof(ret)); + + if (p) { + plugin_insn_append(pc, p, sizeof(ret)); + return ldl_p(p); + } + ret = cpu_ldl_code(env, pc); + plug = tswap32(ret); + plugin_insn_append(pc, &plug, sizeof(ret)); + return ret; +} + +uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, abi_ptr pc) +{ + uint64_t ret, plug; + void *p = translator_access(env, db, pc, sizeof(ret)); + + if (p) { + plugin_insn_append(pc, p, sizeof(ret)); + return ldq_p(p); + } + ret = cpu_ldq_code(env, pc); + plug = tswap64(ret); + plugin_insn_append(pc, &plug, sizeof(ret)); + return ret; +} diff --git a/include/exec/translator.h b/include/exec/translator.h index 69db0f5c21..3b77f5f4aa 100644 --- a/include/exec/translator.h +++ b/include/exec/translator.h @@ -81,24 +81,14 @@ typedef enum DisasJumpType { * Architecture-agnostic disassembly context. */ typedef struct DisasContextBase { - const TranslationBlock *tb; + TranslationBlock *tb; target_ulong pc_first; target_ulong pc_next; DisasJumpType is_jmp; int num_insns; int max_insns; bool singlestep_enabled; -#ifdef CONFIG_USER_ONLY - /* - * Guest address of the last byte of the last protected page. - * - * Pages containing the translated instructions are made non-writable in - * order to achieve consistency in case another thread is modifying the - * code while translate_insn() fetches the instruction bytes piecemeal. - * Such writer threads are blocked on mmap_lock() in page_unprotect(). - */ - target_ulong page_protect_end; -#endif + void *host_addr[2]; } DisasContextBase; /** @@ -183,24 +173,43 @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest); * the relevant information at translation time. */ -#define GEN_TRANSLATOR_LD(fullname, type, load_fn, swap_fn) \ - type fullname ## _swap(CPUArchState *env, DisasContextBase *dcbase, \ - abi_ptr pc, bool do_swap); \ - static inline type fullname(CPUArchState *env, \ - DisasContextBase *dcbase, abi_ptr pc) \ - { \ - return fullname ## _swap(env, dcbase, pc, false); \ +uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db, abi_ptr pc); +uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db, abi_ptr pc); +uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db, abi_ptr pc); +uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, abi_ptr pc); + +static inline uint16_t +translator_lduw_swap(CPUArchState *env, DisasContextBase *db, + abi_ptr pc, bool do_swap) +{ + uint16_t ret = translator_lduw(env, db, pc); + if (do_swap) { + ret = bswap16(ret); } + return ret; +} -#define FOR_EACH_TRANSLATOR_LD(F) \ - F(translator_ldub, uint8_t, cpu_ldub_code, /* no swap */) \ - F(translator_lduw, uint16_t, cpu_lduw_code, bswap16) \ - F(translator_ldl, uint32_t, cpu_ldl_code, bswap32) \ - F(translator_ldq, uint64_t, cpu_ldq_code, bswap64) +static inline uint32_t +translator_ldl_swap(CPUArchState *env, DisasContextBase *db, + abi_ptr pc, bool do_swap) +{ + uint32_t ret = translator_ldl(env, db, pc); + if (do_swap) { + ret = bswap32(ret); + } + return ret; +} -FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD) - -#undef GEN_TRANSLATOR_LD +static inline uint64_t +translator_ldq_swap(CPUArchState *env, DisasContextBase *db, + abi_ptr pc, bool do_swap) +{ + uint64_t ret = translator_ldq(env, db, pc); + if (do_swap) { + ret = bswap64(ret); + } + return ret; +} /* * Return whether addr is on the same page as where disassembly started. From ab12c95d3f1999648d70bca54ebcc0588a07dd3e Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 17 Aug 2022 17:05:04 +0200 Subject: [PATCH 17/20] target/s390x: Make translator stop before the end of a page Right now translator stops right *after* the end of a page, which breaks reporting of fault locations when the last instruction of a multi-insn translation block crosses a page boundary. Signed-off-by: Ilya Leoshkevich Reviewed-by: Richard Henderson Message-Id: <20220817150506.592862-3-iii@linux.ibm.com> Signed-off-by: Richard Henderson --- target/s390x/tcg/translate.c | 15 +++- tests/tcg/multiarch/noexec.c.inc | 139 +++++++++++++++++++++++++++++++ tests/tcg/s390x/Makefile.target | 1 + tests/tcg/s390x/noexec.c | 106 +++++++++++++++++++++++ 4 files changed, 257 insertions(+), 4 deletions(-) create mode 100644 tests/tcg/multiarch/noexec.c.inc create mode 100644 tests/tcg/s390x/noexec.c diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index d4c0b9b3a2..1d2dddab1c 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -6609,6 +6609,14 @@ static void s390x_tr_insn_start(DisasContextBase *dcbase, CPUState *cs) dc->insn_start = tcg_last_op(); } +static target_ulong get_next_pc(CPUS390XState *env, DisasContext *s, + uint64_t pc) +{ + uint64_t insn = ld_code2(env, s, pc); + + return pc + get_ilen((insn >> 8) & 0xff); +} + static void s390x_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs) { CPUS390XState *env = cs->env_ptr; @@ -6616,10 +6624,9 @@ static void s390x_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs) dc->base.is_jmp = translate_one(env, dc); if (dc->base.is_jmp == DISAS_NEXT) { - uint64_t page_start; - - page_start = dc->base.pc_first & TARGET_PAGE_MASK; - if (dc->base.pc_next - page_start >= TARGET_PAGE_SIZE || dc->ex_value) { + if (!is_same_page(dcbase, dc->base.pc_next) || + !is_same_page(dcbase, get_next_pc(env, dc, dc->base.pc_next)) || + dc->ex_value) { dc->base.is_jmp = DISAS_TOO_MANY; } } diff --git a/tests/tcg/multiarch/noexec.c.inc b/tests/tcg/multiarch/noexec.c.inc new file mode 100644 index 0000000000..2ef539b721 --- /dev/null +++ b/tests/tcg/multiarch/noexec.c.inc @@ -0,0 +1,139 @@ +/* + * Common code for arch-specific MMU_INST_FETCH fault testing. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Forward declarations. */ + +static void *arch_mcontext_pc(const mcontext_t *ctx); +static int arch_mcontext_arg(const mcontext_t *ctx); +static void arch_flush(void *p, int len); + +/* Testing infrastructure. */ + +struct noexec_test { + const char *name; + const char *test_code; + int test_len; + int page_ofs; + int entry_ofs; + int expected_si_ofs; + int expected_pc_ofs; + int expected_arg; +}; + +static void *page_base; +static int page_size; +static const struct noexec_test *current_noexec_test; + +static void handle_err(const char *syscall) +{ + printf("[ FAILED ] %s: %s\n", syscall, strerror(errno)); + exit(EXIT_FAILURE); +} + +static void handle_segv(int sig, siginfo_t *info, void *ucontext) +{ + const struct noexec_test *test = current_noexec_test; + const mcontext_t *mc = &((ucontext_t *)ucontext)->uc_mcontext; + void *expected_si; + void *expected_pc; + void *pc; + int arg; + + if (test == NULL) { + printf("[ FAILED ] unexpected SEGV\n"); + exit(EXIT_FAILURE); + } + current_noexec_test = NULL; + + expected_si = page_base + test->expected_si_ofs; + if (info->si_addr != expected_si) { + printf("[ FAILED ] wrong si_addr (%p != %p)\n", + info->si_addr, expected_si); + exit(EXIT_FAILURE); + } + + pc = arch_mcontext_pc(mc); + expected_pc = page_base + test->expected_pc_ofs; + if (pc != expected_pc) { + printf("[ FAILED ] wrong pc (%p != %p)\n", pc, expected_pc); + exit(EXIT_FAILURE); + } + + arg = arch_mcontext_arg(mc); + if (arg != test->expected_arg) { + printf("[ FAILED ] wrong arg (%d != %d)\n", arg, test->expected_arg); + exit(EXIT_FAILURE); + } + + if (mprotect(page_base, page_size, + PROT_READ | PROT_WRITE | PROT_EXEC) < 0) { + handle_err("mprotect"); + } +} + +static void test_noexec_1(const struct noexec_test *test) +{ + void *start = page_base + test->page_ofs; + void (*fn)(int arg) = page_base + test->entry_ofs; + + memcpy(start, test->test_code, test->test_len); + arch_flush(start, test->test_len); + + /* Trigger TB creation in order to test invalidation. */ + fn(0); + + if (mprotect(page_base, page_size, PROT_NONE) < 0) { + handle_err("mprotect"); + } + + /* Trigger SEGV and check that handle_segv() ran. */ + current_noexec_test = test; + fn(0); + assert(current_noexec_test == NULL); +} + +static int test_noexec(struct noexec_test *tests, size_t n_tests) +{ + struct sigaction act; + size_t i; + + memset(&act, 0, sizeof(act)); + act.sa_sigaction = handle_segv; + act.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &act, NULL) < 0) { + handle_err("sigaction"); + } + + page_size = getpagesize(); + page_base = mmap(NULL, 2 * page_size, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (page_base == MAP_FAILED) { + handle_err("mmap"); + } + page_base += page_size; + + for (i = 0; i < n_tests; i++) { + struct noexec_test *test = &tests[i]; + + printf("[ RUN ] %s\n", test->name); + test_noexec_1(test); + printf("[ OK ]\n"); + } + + printf("[ PASSED ]\n"); + return EXIT_SUCCESS; +} diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target index 1a7a4a2f59..5e13a41c3f 100644 --- a/tests/tcg/s390x/Makefile.target +++ b/tests/tcg/s390x/Makefile.target @@ -16,6 +16,7 @@ TESTS+=shift TESTS+=trap TESTS+=signals-s390x TESTS+=branch-relative-long +TESTS+=noexec Z14_TESTS=vfminmax vfminmax: LDFLAGS+=-lm diff --git a/tests/tcg/s390x/noexec.c b/tests/tcg/s390x/noexec.c new file mode 100644 index 0000000000..15d007d07f --- /dev/null +++ b/tests/tcg/s390x/noexec.c @@ -0,0 +1,106 @@ +#include "../multiarch/noexec.c.inc" + +static void *arch_mcontext_pc(const mcontext_t *ctx) +{ + return (void *)ctx->psw.addr; +} + +static int arch_mcontext_arg(const mcontext_t *ctx) +{ + return ctx->gregs[2]; +} + +static void arch_flush(void *p, int len) +{ +} + +extern char noexec_1[]; +extern char noexec_2[]; +extern char noexec_end[]; + +asm("noexec_1:\n" + " lgfi %r2,1\n" /* %r2 is 0 on entry, set 1. */ + "noexec_2:\n" + " lgfi %r2,2\n" /* %r2 is 0/1; set 2. */ + " br %r14\n" /* return */ + "noexec_end:"); + +extern char exrl_1[]; +extern char exrl_2[]; +extern char exrl_end[]; + +asm("exrl_1:\n" + " exrl %r0, exrl_2\n" + " br %r14\n" + "exrl_2:\n" + " lgfi %r2,2\n" + "exrl_end:"); + +int main(void) +{ + struct noexec_test noexec_tests[] = { + { + .name = "fallthrough", + .test_code = noexec_1, + .test_len = noexec_end - noexec_1, + .page_ofs = noexec_1 - noexec_2, + .entry_ofs = noexec_1 - noexec_2, + .expected_si_ofs = 0, + .expected_pc_ofs = 0, + .expected_arg = 1, + }, + { + .name = "jump", + .test_code = noexec_1, + .test_len = noexec_end - noexec_1, + .page_ofs = noexec_1 - noexec_2, + .entry_ofs = 0, + .expected_si_ofs = 0, + .expected_pc_ofs = 0, + .expected_arg = 0, + }, + { + .name = "exrl", + .test_code = exrl_1, + .test_len = exrl_end - exrl_1, + .page_ofs = exrl_1 - exrl_2, + .entry_ofs = exrl_1 - exrl_2, + .expected_si_ofs = 0, + .expected_pc_ofs = exrl_1 - exrl_2, + .expected_arg = 0, + }, + { + .name = "fallthrough [cross]", + .test_code = noexec_1, + .test_len = noexec_end - noexec_1, + .page_ofs = noexec_1 - noexec_2 - 2, + .entry_ofs = noexec_1 - noexec_2 - 2, + .expected_si_ofs = 0, + .expected_pc_ofs = -2, + .expected_arg = 1, + }, + { + .name = "jump [cross]", + .test_code = noexec_1, + .test_len = noexec_end - noexec_1, + .page_ofs = noexec_1 - noexec_2 - 2, + .entry_ofs = -2, + .expected_si_ofs = 0, + .expected_pc_ofs = -2, + .expected_arg = 0, + }, + { + .name = "exrl [cross]", + .test_code = exrl_1, + .test_len = exrl_end - exrl_1, + .page_ofs = exrl_1 - exrl_2 - 2, + .entry_ofs = exrl_1 - exrl_2 - 2, + .expected_si_ofs = 0, + .expected_pc_ofs = exrl_1 - exrl_2 - 2, + .expected_arg = 0, + }, + }; + + return test_noexec(noexec_tests, + sizeof(noexec_tests) / sizeof(noexec_tests[0])); +} From 950936681f322a5ba2813f83eb44dd972be2d5a3 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 17 Aug 2022 17:05:05 +0200 Subject: [PATCH 18/20] target/i386: Make translator stop before the end of a page Right now translator stops right *after* the end of a page, which breaks reporting of fault locations when the last instruction of a multi-insn translation block crosses a page boundary. An implementation, like the one arm and s390x have, would require an i386 length disassembler, which is burdensome to maintain. Another alternative would be to single-step at the end of a guest page, but this may come with a performance impact. Fix by snapshotting disassembly state and restoring it after we figure out we crossed a page boundary. This includes rolling back cc_op updates and emitted ops. Signed-off-by: Ilya Leoshkevich Reviewed-by: Richard Henderson Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1143 Message-Id: <20220817150506.592862-4-iii@linux.ibm.com> [rth: Simplify end-of-insn cross-page checks.] Signed-off-by: Richard Henderson --- target/i386/tcg/translate.c | 64 ++++++++++++++++----------- tests/tcg/x86_64/Makefile.target | 3 +- tests/tcg/x86_64/noexec.c | 75 ++++++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+), 26 deletions(-) create mode 100644 tests/tcg/x86_64/noexec.c diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index ba2ab2b45d..d6420df31d 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -130,6 +130,7 @@ typedef struct DisasContext { TCGv_i64 tmp1_i64; sigjmp_buf jmpbuf; + TCGOp *prev_insn_end; } DisasContext; /* The environment in which user-only runs is constrained. */ @@ -2008,6 +2009,12 @@ static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes) { uint64_t pc = s->pc; + /* This is a subsequent insn that crosses a page boundary. */ + if (s->base.num_insns > 1 && + !is_same_page(&s->base, s->pc + num_bytes - 1)) { + siglongjmp(s->jmpbuf, 2); + } + s->pc += num_bytes; if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) { /* If the instruction's 16th byte is on a different page than the 1st, a @@ -4669,6 +4676,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) int modrm, reg, rm, mod, op, opreg, val; target_ulong next_eip, tval; target_ulong pc_start = s->base.pc_next; + bool orig_cc_op_dirty = s->cc_op_dirty; + CCOp orig_cc_op = s->cc_op; s->pc_start = s->pc = pc_start; s->override = -1; @@ -4681,9 +4690,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) s->rip_offset = 0; /* for relative ip address */ s->vex_l = 0; s->vex_v = 0; - if (sigsetjmp(s->jmpbuf, 0) != 0) { + switch (sigsetjmp(s->jmpbuf, 0)) { + case 0: + break; + case 1: gen_exception_gpf(s); return s->pc; + case 2: + /* Restore state that may affect the next instruction. */ + s->cc_op_dirty = orig_cc_op_dirty; + s->cc_op = orig_cc_op; + s->base.num_insns--; + tcg_remove_ops_after(s->prev_insn_end); + s->base.is_jmp = DISAS_TOO_MANY; + return pc_start; + default: + g_assert_not_reached(); } prefixes = 0; @@ -8745,6 +8767,7 @@ static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *dc = container_of(dcbase, DisasContext, base); + dc->prev_insn_end = tcg_last_op(); tcg_gen_insn_start(dc->base.pc_next, dc->cc_op); } @@ -8765,31 +8788,22 @@ static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) #endif pc_next = disas_insn(dc, cpu); - - if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) { - /* if single step mode, we generate only one instruction and - generate an exception */ - /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear - the flag and abort the translation to give the irqs a - chance to happen */ - dc->base.is_jmp = DISAS_TOO_MANY; - } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT) - && ((pc_next & TARGET_PAGE_MASK) - != ((pc_next + TARGET_MAX_INSN_SIZE - 1) - & TARGET_PAGE_MASK) - || (pc_next & ~TARGET_PAGE_MASK) == 0)) { - /* Do not cross the boundary of the pages in icount mode, - it can cause an exception. Do it only when boundary is - crossed by the first instruction in the block. - If current instruction already crossed the bound - it's ok, - because an exception hasn't stopped this code. - */ - dc->base.is_jmp = DISAS_TOO_MANY; - } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) { - dc->base.is_jmp = DISAS_TOO_MANY; - } - dc->base.pc_next = pc_next; + + if (dc->base.is_jmp == DISAS_NEXT) { + if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) { + /* + * If single step mode, we generate only one instruction and + * generate an exception. + * If irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear + * the flag and abort the translation to give the irqs a + * chance to happen. + */ + dc->base.is_jmp = DISAS_TOO_MANY; + } else if (!is_same_page(&dc->base, pc_next)) { + dc->base.is_jmp = DISAS_TOO_MANY; + } + } } static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target index 6177fd845a..861a0966f4 100644 --- a/tests/tcg/x86_64/Makefile.target +++ b/tests/tcg/x86_64/Makefile.target @@ -10,6 +10,7 @@ include $(SRC_PATH)/tests/tcg/i386/Makefile.target ifeq ($(filter %-linux-user, $(TARGET)),$(TARGET)) X86_64_TESTS += vsyscall +X86_64_TESTS += noexec TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64 else TESTS=$(MULTIARCH_TESTS) @@ -23,5 +24,5 @@ test-x86_64: LDFLAGS+=-lm -lc test-x86_64: test-i386.c test-i386.h test-i386-shift.h test-i386-muldiv.h $(CC) $(CFLAGS) $< -o $@ $(LDFLAGS) -vsyscall: $(SRC_PATH)/tests/tcg/x86_64/vsyscall.c +%: $(SRC_PATH)/tests/tcg/x86_64/%.c $(CC) $(CFLAGS) $< -o $@ $(LDFLAGS) diff --git a/tests/tcg/x86_64/noexec.c b/tests/tcg/x86_64/noexec.c new file mode 100644 index 0000000000..9b124901be --- /dev/null +++ b/tests/tcg/x86_64/noexec.c @@ -0,0 +1,75 @@ +#include "../multiarch/noexec.c.inc" + +static void *arch_mcontext_pc(const mcontext_t *ctx) +{ + return (void *)ctx->gregs[REG_RIP]; +} + +int arch_mcontext_arg(const mcontext_t *ctx) +{ + return ctx->gregs[REG_RDI]; +} + +static void arch_flush(void *p, int len) +{ +} + +extern char noexec_1[]; +extern char noexec_2[]; +extern char noexec_end[]; + +asm("noexec_1:\n" + " movq $1,%rdi\n" /* %rdi is 0 on entry, set 1. */ + "noexec_2:\n" + " movq $2,%rdi\n" /* %rdi is 0/1; set 2. */ + " ret\n" + "noexec_end:"); + +int main(void) +{ + struct noexec_test noexec_tests[] = { + { + .name = "fallthrough", + .test_code = noexec_1, + .test_len = noexec_end - noexec_1, + .page_ofs = noexec_1 - noexec_2, + .entry_ofs = noexec_1 - noexec_2, + .expected_si_ofs = 0, + .expected_pc_ofs = 0, + .expected_arg = 1, + }, + { + .name = "jump", + .test_code = noexec_1, + .test_len = noexec_end - noexec_1, + .page_ofs = noexec_1 - noexec_2, + .entry_ofs = 0, + .expected_si_ofs = 0, + .expected_pc_ofs = 0, + .expected_arg = 0, + }, + { + .name = "fallthrough [cross]", + .test_code = noexec_1, + .test_len = noexec_end - noexec_1, + .page_ofs = noexec_1 - noexec_2 - 2, + .entry_ofs = noexec_1 - noexec_2 - 2, + .expected_si_ofs = 0, + .expected_pc_ofs = -2, + .expected_arg = 1, + }, + { + .name = "jump [cross]", + .test_code = noexec_1, + .test_len = noexec_end - noexec_1, + .page_ofs = noexec_1 - noexec_2 - 2, + .entry_ofs = -2, + .expected_si_ofs = 0, + .expected_pc_ofs = -2, + .expected_arg = 0, + }, + }; + + return test_noexec(noexec_tests, + sizeof(noexec_tests) / sizeof(noexec_tests[0])); +} From ef6e987b64343e0dc9e8526c80ee8d27d0654549 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 18 Aug 2022 12:07:28 -0700 Subject: [PATCH 19/20] target/riscv: Add MAX_INSN_LEN and insn_len These will be useful in properly ending the TB. Reviewed-by: Alistair Francis Acked-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Signed-off-by: Richard Henderson --- target/riscv/translate.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/target/riscv/translate.c b/target/riscv/translate.c index 38666ddc91..a719aa6e63 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -1022,6 +1022,14 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc) /* Include decoders for factored-out extensions */ #include "decode-XVentanaCondOps.c.inc" +/* The specification allows for longer insns, but not supported by qemu. */ +#define MAX_INSN_LEN 4 + +static inline int insn_len(uint16_t first_word) +{ + return (first_word & 3) == 3 ? 4 : 2; +} + static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode) { /* @@ -1037,7 +1045,7 @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode) }; /* Check for compressed insn */ - if (extract16(opcode, 0, 2) != 3) { + if (insn_len(opcode) == 2) { if (!has_ext(ctx, RVC)) { gen_exception_illegal(ctx); } else { From 00c07344fa245b22e895b363320ba4cd0ec1088a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 18 Aug 2022 18:00:30 -0700 Subject: [PATCH 20/20] target/riscv: Make translator stop before the end of a page Right now the translator stops right *after* the end of a page, which breaks reporting of fault locations when the last instruction of a multi-insn translation block crosses a page boundary. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1155 Reviewed-by: Alistair Francis Acked-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Signed-off-by: Richard Henderson --- target/riscv/translate.c | 17 +++++-- tests/tcg/riscv64/Makefile.target | 1 + tests/tcg/riscv64/noexec.c | 79 +++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 4 deletions(-) create mode 100644 tests/tcg/riscv64/noexec.c diff --git a/target/riscv/translate.c b/target/riscv/translate.c index a719aa6e63..f8af6daa70 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -1154,12 +1154,21 @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) } ctx->nftemp = 0; + /* Only the first insn within a TB is allowed to cross a page boundary. */ if (ctx->base.is_jmp == DISAS_NEXT) { - target_ulong page_start; - - page_start = ctx->base.pc_first & TARGET_PAGE_MASK; - if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE) { + if (!is_same_page(&ctx->base, ctx->base.pc_next)) { ctx->base.is_jmp = DISAS_TOO_MANY; + } else { + unsigned page_ofs = ctx->base.pc_next & ~TARGET_PAGE_MASK; + + if (page_ofs > TARGET_PAGE_SIZE - MAX_INSN_LEN) { + uint16_t next_insn = cpu_lduw_code(env, ctx->base.pc_next); + int len = insn_len(next_insn); + + if (!is_same_page(&ctx->base, ctx->base.pc_next + len)) { + ctx->base.is_jmp = DISAS_TOO_MANY; + } + } } } } diff --git a/tests/tcg/riscv64/Makefile.target b/tests/tcg/riscv64/Makefile.target index d41bf6d60d..b5b89dfb0e 100644 --- a/tests/tcg/riscv64/Makefile.target +++ b/tests/tcg/riscv64/Makefile.target @@ -3,3 +3,4 @@ VPATH += $(SRC_PATH)/tests/tcg/riscv64 TESTS += test-div +TESTS += noexec diff --git a/tests/tcg/riscv64/noexec.c b/tests/tcg/riscv64/noexec.c new file mode 100644 index 0000000000..86f64b28db --- /dev/null +++ b/tests/tcg/riscv64/noexec.c @@ -0,0 +1,79 @@ +#include "../multiarch/noexec.c.inc" + +static void *arch_mcontext_pc(const mcontext_t *ctx) +{ + return (void *)ctx->__gregs[REG_PC]; +} + +static int arch_mcontext_arg(const mcontext_t *ctx) +{ + return ctx->__gregs[REG_A0]; +} + +static void arch_flush(void *p, int len) +{ + __builtin___clear_cache(p, p + len); +} + +extern char noexec_1[]; +extern char noexec_2[]; +extern char noexec_end[]; + +asm(".option push\n" + ".option norvc\n" + "noexec_1:\n" + " li a0,1\n" /* a0 is 0 on entry, set 1. */ + "noexec_2:\n" + " li a0,2\n" /* a0 is 0/1; set 2. */ + " ret\n" + "noexec_end:\n" + ".option pop"); + +int main(void) +{ + struct noexec_test noexec_tests[] = { + { + .name = "fallthrough", + .test_code = noexec_1, + .test_len = noexec_end - noexec_1, + .page_ofs = noexec_1 - noexec_2, + .entry_ofs = noexec_1 - noexec_2, + .expected_si_ofs = 0, + .expected_pc_ofs = 0, + .expected_arg = 1, + }, + { + .name = "jump", + .test_code = noexec_1, + .test_len = noexec_end - noexec_1, + .page_ofs = noexec_1 - noexec_2, + .entry_ofs = 0, + .expected_si_ofs = 0, + .expected_pc_ofs = 0, + .expected_arg = 0, + }, + { + .name = "fallthrough [cross]", + .test_code = noexec_1, + .test_len = noexec_end - noexec_1, + .page_ofs = noexec_1 - noexec_2 - 2, + .entry_ofs = noexec_1 - noexec_2 - 2, + .expected_si_ofs = 0, + .expected_pc_ofs = -2, + .expected_arg = 1, + }, + { + .name = "jump [cross]", + .test_code = noexec_1, + .test_len = noexec_end - noexec_1, + .page_ofs = noexec_1 - noexec_2 - 2, + .entry_ofs = -2, + .expected_si_ofs = 0, + .expected_pc_ofs = -2, + .expected_arg = 0, + }, + }; + + return test_noexec(noexec_tests, + sizeof(noexec_tests) / sizeof(noexec_tests[0])); +}