From a0f06a6226c463137acc5c187aef2c268f389282 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 May 2023 15:07:47 +0100 Subject: [PATCH 01/53] target/m68k: Fix gen_load_fp for OS_LONG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Case was accidentally dropped in b7a94da9550b. Tested-by: Laurent Vivier Reviewed-by: Laurent Vivier Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/m68k/translate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/target/m68k/translate.c b/target/m68k/translate.c index 744eb3748b..44d852b106 100644 --- a/target/m68k/translate.c +++ b/target/m68k/translate.c @@ -959,6 +959,7 @@ static void gen_load_fp(DisasContext *s, int opsize, TCGv addr, TCGv_ptr fp, switch (opsize) { case OS_BYTE: case OS_WORD: + case OS_LONG: tcg_gen_qemu_ld_tl(tmp, addr, index, opsize | MO_SIGN | MO_TE); gen_helper_exts32(cpu_env, fp, tmp); break; From 8c313254e61ed47a1bf4a2db714b25cdd94fbcce Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 5 May 2023 21:40:49 +0100 Subject: [PATCH 02/53] accel/tcg: Fix atomic_mmu_lookup for reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A copy-paste bug had us looking at the victim cache for writes. Cc: qemu-stable@nongnu.org Reported-by: Peter Maydell Signed-off-by: Richard Henderson Fixes: 08dff435e2 ("tcg: Probe the proper permissions for atomic ops") Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Peter Maydell Message-Id: <20230505204049.352469-1-richard.henderson@linaro.org> --- accel/tcg/cputlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 3117886af1..0b8a5f93d2 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1835,7 +1835,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, } else /* if (prot & PAGE_READ) */ { tlb_addr = tlbe->addr_read; if (!tlb_hit(tlb_addr, addr)) { - if (!VICTIM_TLB_HIT(addr_write, addr)) { + if (!VICTIM_TLB_HIT(addr_read, addr)) { tlb_fill(env_cpu(env), addr, size, MMU_DATA_LOAD, mmu_idx, retaddr); index = tlb_index(env, mmu_idx, addr); From 692aba8d769585a6cd9e37c101af73dbd1482f7f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 10 May 2023 18:04:45 +0100 Subject: [PATCH 03/53] disas: Fix tabs and braces in disas.c Fix these before moving the file, for checkpatch.pl. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-Id: <20230510170812.663149-1-richard.henderson@linaro.org> --- disas.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/disas.c b/disas.c index b087c12c47..d46f638a72 100644 --- a/disas.c +++ b/disas.c @@ -226,11 +226,12 @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code, } for (pc = code; size > 0; pc += count, size -= count) { - fprintf(out, "0x" TARGET_FMT_lx ": ", pc); - count = s.info.print_insn(pc, &s.info); - fprintf(out, "\n"); - if (count < 0) - break; + fprintf(out, "0x" TARGET_FMT_lx ": ", pc); + count = s.info.print_insn(pc, &s.info); + fprintf(out, "\n"); + if (count < 0) { + break; + } if (size < count) { fprintf(out, "Disassembler disagrees with translator over instruction " From f779026478773da05e3f5b4621dddc5c6d6542dc Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 3 May 2023 08:23:26 +0100 Subject: [PATCH 04/53] disas: Move disas.c to disas/ Reviewed-by: Thomas Huth Signed-off-by: Richard Henderson Message-Id: <20230503072331.1747057-80-richard.henderson@linaro.org> --- disas.c => disas/disas.c | 0 disas/meson.build | 4 +++- meson.build | 3 --- 3 files changed, 3 insertions(+), 4 deletions(-) rename disas.c => disas/disas.c (100%) diff --git a/disas.c b/disas/disas.c similarity index 100% rename from disas.c rename to disas/disas.c diff --git a/disas/meson.build b/disas/meson.build index c865bdd882..cbf6315f25 100644 --- a/disas/meson.build +++ b/disas/meson.build @@ -10,4 +10,6 @@ common_ss.add(when: 'CONFIG_RISCV_DIS', if_true: files('riscv.c')) common_ss.add(when: 'CONFIG_SH4_DIS', if_true: files('sh4.c')) common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c')) common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: files('xtensa.c')) -common_ss.add(when: capstone, if_true: files('capstone.c')) +common_ss.add(when: capstone, if_true: [files('capstone.c'), capstone]) + +specific_ss.add(files('disas.c'), capstone) diff --git a/meson.build b/meson.build index 5c7af6f3bc..d3cf48960b 100644 --- a/meson.build +++ b/meson.build @@ -3153,9 +3153,6 @@ specific_ss.add(files('cpu.c')) subdir('softmmu') -common_ss.add(capstone) -specific_ss.add(files('disas.c'), capstone) - # Work around a gcc bug/misfeature wherein constant propagation looks # through an alias: # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99696 From b6235a759a4552d21c5b68d16c894aa5b96d4b96 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 3 May 2023 08:23:27 +0100 Subject: [PATCH 05/53] disas: Remove target_ulong from the interface Use uint64_t for the pc, and size_t for the size. Reviewed-by: Thomas Huth Signed-off-by: Richard Henderson Message-Id: <20230503072331.1747057-81-richard.henderson@linaro.org> --- bsd-user/elfload.c | 5 +++-- disas/disas.c | 19 +++++++++---------- include/disas/disas.h | 17 ++++++----------- linux-user/elfload.c | 5 +++-- 4 files changed, 21 insertions(+), 25 deletions(-) diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c index fbcdc94b96..1f650bdde8 100644 --- a/bsd-user/elfload.c +++ b/bsd-user/elfload.c @@ -352,9 +352,10 @@ static abi_ulong load_elf_interp(struct elfhdr *interp_elf_ex, static int symfind(const void *s0, const void *s1) { - target_ulong addr = *(target_ulong *)s0; struct elf_sym *sym = (struct elf_sym *)s1; + __typeof(sym->st_value) addr = *(uint64_t *)s0; int result = 0; + if (addr < sym->st_value) { result = -1; } else if (addr >= sym->st_value + sym->st_size) { @@ -363,7 +364,7 @@ static int symfind(const void *s0, const void *s1) return result; } -static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr) +static const char *lookup_symbolxx(struct syminfo *s, uint64_t orig_addr) { #if ELF_CLASS == ELFCLASS32 struct elf_sym *syms = s->disas_symtab.elf32; diff --git a/disas/disas.c b/disas/disas.c index d46f638a72..aac7cf3b03 100644 --- a/disas/disas.c +++ b/disas/disas.c @@ -204,10 +204,9 @@ static void initialize_debug_host(CPUDebug *s) } /* Disassemble this for me please... (debugging). */ -void target_disas(FILE *out, CPUState *cpu, target_ulong code, - target_ulong size) +void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size) { - target_ulong pc; + uint64_t pc; int count; CPUDebug s; @@ -226,7 +225,7 @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code, } for (pc = code; size > 0; pc += count, size -= count) { - fprintf(out, "0x" TARGET_FMT_lx ": ", pc); + fprintf(out, "0x%08" PRIx64 ": ", pc); count = s.info.print_insn(pc, &s.info); fprintf(out, "\n"); if (count < 0) { @@ -293,7 +292,7 @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size) } /* Disassemble this for me please... (debugging). */ -void disas(FILE *out, const void *code, unsigned long size) +void disas(FILE *out, const void *code, size_t size) { uintptr_t pc; int count; @@ -325,7 +324,7 @@ void disas(FILE *out, const void *code, unsigned long size) } /* Look up symbol for debugging purpose. Returns "" if unknown. */ -const char *lookup_symbol(target_ulong orig_addr) +const char *lookup_symbol(uint64_t orig_addr) { const char *symbol = ""; struct syminfo *s; @@ -357,8 +356,8 @@ physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length, } /* Disassembler for the monitor. */ -void monitor_disas(Monitor *mon, CPUState *cpu, - target_ulong pc, int nb_insn, int is_physical) +void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc, + int nb_insn, bool is_physical) { int count, i; CPUDebug s; @@ -379,13 +378,13 @@ void monitor_disas(Monitor *mon, CPUState *cpu, } if (!s.info.print_insn) { - monitor_printf(mon, "0x" TARGET_FMT_lx + monitor_printf(mon, "0x%08" PRIx64 ": Asm output not supported on this arch\n", pc); return; } for (i = 0; i < nb_insn; i++) { - g_string_append_printf(ds, "0x" TARGET_FMT_lx ": ", pc); + g_string_append_printf(ds, "0x%08" PRIx64 ": ", pc); count = s.info.print_insn(pc, &s.info); g_string_append_c(ds, '\n'); if (count < 0) { diff --git a/include/disas/disas.h b/include/disas/disas.h index d363e95ede..6c394e0b09 100644 --- a/include/disas/disas.h +++ b/include/disas/disas.h @@ -7,28 +7,23 @@ #include "cpu.h" /* Disassemble this for me please... (debugging). */ -void disas(FILE *out, const void *code, unsigned long size); -void target_disas(FILE *out, CPUState *cpu, target_ulong code, - target_ulong size); +void disas(FILE *out, const void *code, size_t size); +void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size); -void monitor_disas(Monitor *mon, CPUState *cpu, - target_ulong pc, int nb_insn, int is_physical); +void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc, + int nb_insn, bool is_physical); char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size); /* Look up symbol for debugging purpose. Returns "" if unknown. */ -const char *lookup_symbol(target_ulong orig_addr); +const char *lookup_symbol(uint64_t orig_addr); #endif struct syminfo; struct elf32_sym; struct elf64_sym; -#if defined(CONFIG_USER_ONLY) -typedef const char *(*lookup_symbol_t)(struct syminfo *s, target_ulong orig_addr); -#else -typedef const char *(*lookup_symbol_t)(struct syminfo *s, hwaddr orig_addr); -#endif +typedef const char *(*lookup_symbol_t)(struct syminfo *s, uint64_t orig_addr); struct syminfo { lookup_symbol_t lookup_symbol; diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 703f7434a0..80085b8a30 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -3327,9 +3327,10 @@ static void load_elf_interp(const char *filename, struct image_info *info, static int symfind(const void *s0, const void *s1) { - target_ulong addr = *(target_ulong *)s0; struct elf_sym *sym = (struct elf_sym *)s1; + __typeof(sym->st_value) addr = *(uint64_t *)s0; int result = 0; + if (addr < sym->st_value) { result = -1; } else if (addr >= sym->st_value + sym->st_size) { @@ -3338,7 +3339,7 @@ static int symfind(const void *s0, const void *s1) return result; } -static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr) +static const char *lookup_symbolxx(struct syminfo *s, uint64_t orig_addr) { #if ELF_CLASS == ELFCLASS32 struct elf_sym *syms = s->disas_symtab.elf32; From 45dfbd4320cc29a06b4921fe21a145e9a4f87323 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 3 May 2023 08:23:29 +0100 Subject: [PATCH 06/53] disas: Remove target-specific headers Reviewed-by: Thomas Huth Signed-off-by: Richard Henderson Message-Id: <20230503072331.1747057-83-richard.henderson@linaro.org> --- disas/disas.c | 3 ++- include/disas/disas.h | 6 ------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/disas/disas.c b/disas/disas.c index aac7cf3b03..a06954254b 100644 --- a/disas/disas.c +++ b/disas/disas.c @@ -3,9 +3,10 @@ #include "disas/dis-asm.h" #include "elf.h" #include "qemu/qemu-print.h" - #include "disas/disas.h" #include "disas/capstone.h" +#include "hw/core/cpu.h" +#include "exec/memory.h" typedef struct CPUDebug { struct disassemble_info info; diff --git a/include/disas/disas.h b/include/disas/disas.h index 6c394e0b09..176775eff7 100644 --- a/include/disas/disas.h +++ b/include/disas/disas.h @@ -1,11 +1,6 @@ #ifndef QEMU_DISAS_H #define QEMU_DISAS_H -#include "exec/hwaddr.h" - -#ifdef NEED_CPU_H -#include "cpu.h" - /* Disassemble this for me please... (debugging). */ void disas(FILE *out, const void *code, size_t size); void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size); @@ -17,7 +12,6 @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size); /* Look up symbol for debugging purpose. Returns "" if unknown. */ const char *lookup_symbol(uint64_t orig_addr); -#endif struct syminfo; struct elf32_sym; From e22d3c48db6fea230a24881a6a29766f2156283a Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 9 May 2023 17:14:36 +0100 Subject: [PATCH 07/53] disas: Move softmmu specific code to separate file We'd like to move disas.c into the common code source set, where CONFIG_USER_ONLY is not available anymore. So we have to move the related code into a separate file instead. Signed-off-by: Thomas Huth Message-Id: <20230508133745.109463-2-thuth@redhat.com> [rth: Type change done in a separate patch] Signed-off-by: Richard Henderson --- disas/disas-internal.h | 21 ++++++++++++ disas/disas-mon.c | 65 ++++++++++++++++++++++++++++++++++++ disas/disas.c | 76 ++++-------------------------------------- disas/meson.build | 1 + 4 files changed, 93 insertions(+), 70 deletions(-) create mode 100644 disas/disas-internal.h create mode 100644 disas/disas-mon.c diff --git a/disas/disas-internal.h b/disas/disas-internal.h new file mode 100644 index 0000000000..84a01f126f --- /dev/null +++ b/disas/disas-internal.h @@ -0,0 +1,21 @@ +/* + * Definitions used internally in the disassembly code + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef DISAS_INTERNAL_H +#define DISAS_INTERNAL_H + +#include "disas/dis-asm.h" + +typedef struct CPUDebug { + struct disassemble_info info; + CPUState *cpu; +} CPUDebug; + +void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu); +int disas_gstring_printf(FILE *stream, const char *fmt, ...) + G_GNUC_PRINTF(2, 3); + +#endif diff --git a/disas/disas-mon.c b/disas/disas-mon.c new file mode 100644 index 0000000000..48ac492c6c --- /dev/null +++ b/disas/disas-mon.c @@ -0,0 +1,65 @@ +/* + * Functions related to disassembly from the monitor + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "disas-internal.h" +#include "disas/disas.h" +#include "exec/memory.h" +#include "hw/core/cpu.h" +#include "monitor/monitor.h" + +static int +physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length, + struct disassemble_info *info) +{ + CPUDebug *s = container_of(info, CPUDebug, info); + MemTxResult res; + + res = address_space_read(s->cpu->as, memaddr, MEMTXATTRS_UNSPECIFIED, + myaddr, length); + return res == MEMTX_OK ? 0 : EIO; +} + +/* Disassembler for the monitor. */ +void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc, + int nb_insn, bool is_physical) +{ + int count, i; + CPUDebug s; + g_autoptr(GString) ds = g_string_new(""); + + disas_initialize_debug_target(&s, cpu); + s.info.fprintf_func = disas_gstring_printf; + s.info.stream = (FILE *)ds; /* abuse this slot */ + + if (is_physical) { + s.info.read_memory_func = physical_read_memory; + } + s.info.buffer_vma = pc; + + if (s.info.cap_arch >= 0 && cap_disas_monitor(&s.info, pc, nb_insn)) { + monitor_puts(mon, ds->str); + return; + } + + if (!s.info.print_insn) { + monitor_printf(mon, "0x%08" PRIx64 + ": Asm output not supported on this arch\n", pc); + return; + } + + for (i = 0; i < nb_insn; i++) { + g_string_append_printf(ds, "0x%08" PRIx64 ": ", pc); + count = s.info.print_insn(pc, &s.info); + g_string_append_c(ds, '\n'); + if (count < 0) { + break; + } + pc += count; + } + + monitor_puts(mon, ds->str); +} diff --git a/disas/disas.c b/disas/disas.c index a06954254b..45614af02d 100644 --- a/disas/disas.c +++ b/disas/disas.c @@ -1,6 +1,6 @@ /* General "disassemble this chunk" code. Used for debugging. */ #include "qemu/osdep.h" -#include "disas/dis-asm.h" +#include "disas/disas-internal.h" #include "elf.h" #include "qemu/qemu-print.h" #include "disas/disas.h" @@ -8,11 +8,6 @@ #include "hw/core/cpu.h" #include "exec/memory.h" -typedef struct CPUDebug { - struct disassemble_info info; - CPUState *cpu; -} CPUDebug; - /* Filled in by elfload.c. Simplistic, but will do for now. */ struct syminfo *syminfos = NULL; @@ -120,7 +115,7 @@ static void initialize_debug(CPUDebug *s) s->info.symbol_at_address_func = symbol_at_address; } -static void initialize_debug_target(CPUDebug *s, CPUState *cpu) +void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu) { initialize_debug(s); @@ -211,7 +206,7 @@ void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size) int count; CPUDebug s; - initialize_debug_target(&s, cpu); + disas_initialize_debug_target(&s, cpu); s.info.fprintf_func = fprintf; s.info.stream = out; s.info.buffer_vma = code; @@ -242,8 +237,7 @@ void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size) } } -static int G_GNUC_PRINTF(2, 3) -gstring_printf(FILE *stream, const char *fmt, ...) +int disas_gstring_printf(FILE *stream, const char *fmt, ...) { /* We abuse the FILE parameter to pass a GString. */ GString *s = (GString *)stream; @@ -273,8 +267,8 @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size) CPUDebug s; GString *ds = g_string_new(NULL); - initialize_debug_target(&s, cpu); - s.info.fprintf_func = gstring_printf; + disas_initialize_debug_target(&s, cpu); + s.info.fprintf_func = disas_gstring_printf; s.info.stream = (FILE *)ds; /* abuse this slot */ s.info.buffer_vma = addr; s.info.buffer_length = size; @@ -339,61 +333,3 @@ const char *lookup_symbol(uint64_t orig_addr) return symbol; } - -#if !defined(CONFIG_USER_ONLY) - -#include "monitor/monitor.h" - -static int -physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length, - struct disassemble_info *info) -{ - CPUDebug *s = container_of(info, CPUDebug, info); - MemTxResult res; - - res = address_space_read(s->cpu->as, memaddr, MEMTXATTRS_UNSPECIFIED, - myaddr, length); - return res == MEMTX_OK ? 0 : EIO; -} - -/* Disassembler for the monitor. */ -void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc, - int nb_insn, bool is_physical) -{ - int count, i; - CPUDebug s; - g_autoptr(GString) ds = g_string_new(""); - - initialize_debug_target(&s, cpu); - s.info.fprintf_func = gstring_printf; - s.info.stream = (FILE *)ds; /* abuse this slot */ - - if (is_physical) { - s.info.read_memory_func = physical_read_memory; - } - s.info.buffer_vma = pc; - - if (s.info.cap_arch >= 0 && cap_disas_monitor(&s.info, pc, nb_insn)) { - monitor_puts(mon, ds->str); - return; - } - - if (!s.info.print_insn) { - monitor_printf(mon, "0x%08" PRIx64 - ": Asm output not supported on this arch\n", pc); - return; - } - - for (i = 0; i < nb_insn; i++) { - g_string_append_printf(ds, "0x%08" PRIx64 ": ", pc); - count = s.info.print_insn(pc, &s.info); - g_string_append_c(ds, '\n'); - if (count < 0) { - break; - } - pc += count; - } - - monitor_puts(mon, ds->str); -} -#endif diff --git a/disas/meson.build b/disas/meson.build index cbf6315f25..f40230c58f 100644 --- a/disas/meson.build +++ b/disas/meson.build @@ -12,4 +12,5 @@ common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c')) common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: files('xtensa.c')) common_ss.add(when: capstone, if_true: [files('capstone.c'), capstone]) +softmmu_ss.add(files('disas-mon.c')) specific_ss.add(files('disas.c'), capstone) From eb0153efa6fa58b2c9d891b17766dbedc10e31b5 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 9 May 2023 17:27:48 +0100 Subject: [PATCH 08/53] disas: Move disas.c into the target-independent source set Use target_words_bigendian() instead of an ifdef. Remove CONFIG_RISCV_DIS from the check for riscv as a host; this is a poisoned identifier, and anyway will always be set by meson.build when building on a riscv host. Signed-off-by: Thomas Huth Message-Id: <20230508133745.109463-3-thuth@redhat.com> [rth: Type change done in a separate patch] Signed-off-by: Richard Henderson --- disas/disas.c | 12 ++++++------ disas/meson.build | 3 ++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/disas/disas.c b/disas/disas.c index 45614af02d..0d2d06c2ec 100644 --- a/disas/disas.c +++ b/disas/disas.c @@ -122,11 +122,11 @@ void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu) s->cpu = cpu; s->info.read_memory_func = target_read_memory; s->info.print_address_func = print_address; -#if TARGET_BIG_ENDIAN - s->info.endian = BFD_ENDIAN_BIG; -#else - s->info.endian = BFD_ENDIAN_LITTLE; -#endif + if (target_words_bigendian()) { + s->info.endian = BFD_ENDIAN_BIG; + } else { + s->info.endian = BFD_ENDIAN_LITTLE; + } CPUClass *cc = CPU_GET_CLASS(cpu); if (cc->disas_set_info) { @@ -164,7 +164,7 @@ static void initialize_debug_host(CPUDebug *s) # ifdef _ARCH_PPC64 s->info.cap_mode = CS_MODE_64; # endif -#elif defined(__riscv) && defined(CONFIG_RISCV_DIS) +#elif defined(__riscv) #if defined(_ILP32) || (__riscv_xlen == 32) s->info.print_insn = print_insn_riscv32; #elif defined(_LP64) diff --git a/disas/meson.build b/disas/meson.build index f40230c58f..832727e4b3 100644 --- a/disas/meson.build +++ b/disas/meson.build @@ -11,6 +11,7 @@ common_ss.add(when: 'CONFIG_SH4_DIS', if_true: files('sh4.c')) common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c')) common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: files('xtensa.c')) common_ss.add(when: capstone, if_true: [files('capstone.c'), capstone]) +common_ss.add(files('disas.c')) softmmu_ss.add(files('disas-mon.c')) -specific_ss.add(files('disas.c'), capstone) +specific_ss.add(capstone) From 370ed600296982a0248b091915c8e8893508d8a3 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Thu, 27 Apr 2023 03:09:24 +0100 Subject: [PATCH 09/53] cpu: expose qemu_cpu_list_lock for lock-guard use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose qemu_cpu_list_lock globally so that we can use WITH_QEMU_LOCK_GUARD and QEMU_LOCK_GUARD to simplify a few code paths now and in future. Signed-off-by: Jamie Iles Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-Id: <20230427020925.51003-2-quic_jiles@quicinc.com> Signed-off-by: Richard Henderson --- cpus-common.c | 2 +- include/exec/cpu-common.h | 1 + linux-user/elfload.c | 13 +++++++------ migration/dirtyrate.c | 26 +++++++++++++------------- trace/control-target.c | 9 ++++----- 5 files changed, 26 insertions(+), 25 deletions(-) diff --git a/cpus-common.c b/cpus-common.c index a53716deb4..45c745ecf6 100644 --- a/cpus-common.c +++ b/cpus-common.c @@ -25,7 +25,7 @@ #include "qemu/lockable.h" #include "trace/trace-root.h" -static QemuMutex qemu_cpu_list_lock; +QemuMutex qemu_cpu_list_lock; static QemuCond exclusive_cond; static QemuCond exclusive_resume; static QemuCond qemu_work_cond; diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 1be4a3117e..e5a55ede5f 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -32,6 +32,7 @@ extern intptr_t qemu_host_page_mask; #define REAL_HOST_PAGE_ALIGN(addr) ROUND_UP((addr), qemu_real_host_page_size()) /* The CPU list lock nests outside page_(un)lock or mmap_(un)lock */ +extern QemuMutex qemu_cpu_list_lock; void qemu_init_cpu_list(void); void cpu_list_lock(void); void cpu_list_unlock(void); diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 80085b8a30..418ad92598 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -17,6 +17,7 @@ #include "qemu/guest-random.h" #include "qemu/units.h" #include "qemu/selfmap.h" +#include "qemu/lockable.h" #include "qapi/error.h" #include "qemu/error-report.h" #include "target_signal.h" @@ -4238,14 +4239,14 @@ static int fill_note_info(struct elf_note_info *info, info->notes_size += note_size(&info->notes[i]); /* read and fill status of all threads */ - cpu_list_lock(); - CPU_FOREACH(cpu) { - if (cpu == thread_cpu) { - continue; + WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) { + CPU_FOREACH(cpu) { + if (cpu == thread_cpu) { + continue; + } + fill_thread_info(info, cpu->env_ptr); } - fill_thread_info(info, cpu->env_ptr); } - cpu_list_unlock(); return (0); } diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 180ba38c7a..388337a332 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -150,25 +150,25 @@ int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms, retry: init_time_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - cpu_list_lock(); - gen_id = cpu_list_generation_id_get(); - records = vcpu_dirty_stat_alloc(stat); - vcpu_dirty_stat_collect(stat, records, true); - cpu_list_unlock(); + WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) { + gen_id = cpu_list_generation_id_get(); + records = vcpu_dirty_stat_alloc(stat); + vcpu_dirty_stat_collect(stat, records, true); + } duration = dirty_stat_wait(calc_time_ms, init_time_ms); global_dirty_log_sync(flag, one_shot); - cpu_list_lock(); - if (gen_id != cpu_list_generation_id_get()) { - g_free(records); - g_free(stat->rates); - cpu_list_unlock(); - goto retry; + WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) { + if (gen_id != cpu_list_generation_id_get()) { + g_free(records); + g_free(stat->rates); + cpu_list_unlock(); + goto retry; + } + vcpu_dirty_stat_collect(stat, records, false); } - vcpu_dirty_stat_collect(stat, records, false); - cpu_list_unlock(); for (i = 0; i < stat->nvcpu; i++) { dirtyrate = do_calculate_dirtyrate(records[i], duration); diff --git a/trace/control-target.c b/trace/control-target.c index 232c97a4a1..c0c1e2310a 100644 --- a/trace/control-target.c +++ b/trace/control-target.c @@ -8,6 +8,7 @@ */ #include "qemu/osdep.h" +#include "qemu/lockable.h" #include "cpu.h" #include "trace/trace-root.h" #include "trace/control.h" @@ -116,11 +117,9 @@ static bool adding_first_cpu1(void) static bool adding_first_cpu(void) { - bool res; - cpu_list_lock(); - res = adding_first_cpu1(); - cpu_list_unlock(); - return res; + QEMU_LOCK_GUARD(&qemu_cpu_list_lock); + + return adding_first_cpu1(); } void trace_init_vcpu(CPUState *vcpu) From 83ecdb18eb65ed57239b87f3898ae92a590d0077 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Thu, 27 Apr 2023 03:09:25 +0100 Subject: [PATCH 10/53] accel/tcg/tcg-accel-ops-rr: ensure fairness with icount MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The round-robin scheduler will iterate over the CPU list with an assigned budget until the next timer expiry and may exit early because of a TB exit. This is fine under normal operation but with icount enabled and SMP it is possible for a CPU to be starved of run time and the system live-locks. For example, booting a riscv64 platform with '-icount shift=0,align=off,sleep=on -smp 2' we observe a livelock once the kernel has timers enabled and starts performing TLB shootdowns. In this case we have CPU 0 in M-mode with interrupts disabled sending an IPI to CPU 1. As we enter the TCG loop, we assign the icount budget to next timer interrupt to CPU 0 and begin executing where the guest is sat in a busy loop exhausting all of the budget before we try to execute CPU 1 which is the target of the IPI but CPU 1 is left with no budget with which to execute and the process repeats. We try here to add some fairness by splitting the budget across all of the CPUs on the thread fairly before entering each one. The CPU count is cached on CPU list generation ID to avoid iterating the list on each loop iteration. With this change it is possible to boot an SMP rv64 guest with icount enabled and no hangs. Reviewed-by: Philippe Mathieu-Daudé Tested-by: Peter Maydell Signed-off-by: Jamie Iles Reviewed-by: Richard Henderson Message-Id: <20230427020925.51003-3-quic_jiles@quicinc.com> Signed-off-by: Richard Henderson --- accel/tcg/tcg-accel-ops-icount.c | 21 ++++++++++++++---- accel/tcg/tcg-accel-ops-icount.h | 3 ++- accel/tcg/tcg-accel-ops-rr.c | 37 +++++++++++++++++++++++++++++++- replay/replay.c | 3 +-- 4 files changed, 56 insertions(+), 8 deletions(-) diff --git a/accel/tcg/tcg-accel-ops-icount.c b/accel/tcg/tcg-accel-ops-icount.c index 84cc7421be..3d2cfbbc97 100644 --- a/accel/tcg/tcg-accel-ops-icount.c +++ b/accel/tcg/tcg-accel-ops-icount.c @@ -89,7 +89,20 @@ void icount_handle_deadline(void) } } -void icount_prepare_for_run(CPUState *cpu) +/* Distribute the budget evenly across all CPUs */ +int64_t icount_percpu_budget(int cpu_count) +{ + int64_t limit = icount_get_limit(); + int64_t timeslice = limit / cpu_count; + + if (timeslice == 0) { + timeslice = limit; + } + + return timeslice; +} + +void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget) { int insns_left; @@ -101,13 +114,13 @@ void icount_prepare_for_run(CPUState *cpu) g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0); g_assert(cpu->icount_extra == 0); - cpu->icount_budget = icount_get_limit(); + replay_mutex_lock(); + + cpu->icount_budget = MIN(icount_get_limit(), cpu_budget); insns_left = MIN(0xffff, cpu->icount_budget); cpu_neg(cpu)->icount_decr.u16.low = insns_left; cpu->icount_extra = cpu->icount_budget - insns_left; - replay_mutex_lock(); - if (cpu->icount_budget == 0) { /* * We're called without the iothread lock, so must take it while diff --git a/accel/tcg/tcg-accel-ops-icount.h b/accel/tcg/tcg-accel-ops-icount.h index 1b6fd9c607..16a301b6dc 100644 --- a/accel/tcg/tcg-accel-ops-icount.h +++ b/accel/tcg/tcg-accel-ops-icount.h @@ -11,7 +11,8 @@ #define TCG_ACCEL_OPS_ICOUNT_H void icount_handle_deadline(void); -void icount_prepare_for_run(CPUState *cpu); +void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget); +int64_t icount_percpu_budget(int cpu_count); void icount_process_data(CPUState *cpu); void icount_handle_interrupt(CPUState *cpu, int mask); diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c index 290833a37f..5788efa5ff 100644 --- a/accel/tcg/tcg-accel-ops-rr.c +++ b/accel/tcg/tcg-accel-ops-rr.c @@ -24,6 +24,7 @@ */ #include "qemu/osdep.h" +#include "qemu/lockable.h" #include "sysemu/tcg.h" #include "sysemu/replay.h" #include "sysemu/cpu-timers.h" @@ -139,6 +140,33 @@ static void rr_force_rcu(Notifier *notify, void *data) rr_kick_next_cpu(); } +/* + * Calculate the number of CPUs that we will process in a single iteration of + * the main CPU thread loop so that we can fairly distribute the instruction + * count across CPUs. + * + * The CPU count is cached based on the CPU list generation ID to avoid + * iterating the list every time. + */ +static int rr_cpu_count(void) +{ + static unsigned int last_gen_id = ~0; + static int cpu_count; + CPUState *cpu; + + QEMU_LOCK_GUARD(&qemu_cpu_list_lock); + + if (cpu_list_generation_id_get() != last_gen_id) { + cpu_count = 0; + CPU_FOREACH(cpu) { + ++cpu_count; + } + last_gen_id = cpu_list_generation_id_get(); + } + + return cpu_count; +} + /* * In the single-threaded case each vCPU is simulated in turn. If * there is more than a single vCPU we create a simple timer to kick @@ -185,11 +213,16 @@ static void *rr_cpu_thread_fn(void *arg) cpu->exit_request = 1; while (1) { + /* Only used for icount_enabled() */ + int64_t cpu_budget = 0; + qemu_mutex_unlock_iothread(); replay_mutex_lock(); qemu_mutex_lock_iothread(); if (icount_enabled()) { + int cpu_count = rr_cpu_count(); + /* Account partial waits to QEMU_CLOCK_VIRTUAL. */ icount_account_warp_timer(); /* @@ -197,6 +230,8 @@ static void *rr_cpu_thread_fn(void *arg) * waking up the I/O thread and waiting for completion. */ icount_handle_deadline(); + + cpu_budget = icount_percpu_budget(cpu_count); } replay_mutex_unlock(); @@ -218,7 +253,7 @@ static void *rr_cpu_thread_fn(void *arg) qemu_mutex_unlock_iothread(); if (icount_enabled()) { - icount_prepare_for_run(cpu); + icount_prepare_for_run(cpu, cpu_budget); } r = tcg_cpus_exec(cpu); if (icount_enabled()) { diff --git a/replay/replay.c b/replay/replay.c index c39156c522..0f7d766efe 100644 --- a/replay/replay.c +++ b/replay/replay.c @@ -74,7 +74,7 @@ uint64_t replay_get_current_icount(void) int replay_get_instructions(void) { int res = 0; - replay_mutex_lock(); + g_assert(replay_mutex_locked()); if (replay_next_event_is(EVENT_INSTRUCTION)) { res = replay_state.instruction_count; if (replay_break_icount != -1LL) { @@ -85,7 +85,6 @@ int replay_get_instructions(void) } } } - replay_mutex_unlock(); return res; } From 530074c6c1134a8ca488f50af82788a634a1e552 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 19 Apr 2023 19:10:27 +0200 Subject: [PATCH 11/53] tcg/i386: Introduce prepare_host_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment, and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st into one function that returns HostAddress and TCGLabelQemuLdst structures. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.c.inc | 346 ++++++++++++++++---------------------- 1 file changed, 145 insertions(+), 201 deletions(-) diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index aae698121a..5d702b69ac 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -1802,135 +1802,6 @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = { [MO_BEUQ] = helper_be_stq_mmu, }; -/* Perform the TLB load and compare. - - Inputs: - ADDRLO and ADDRHI contain the low and high part of the address. - - MEM_INDEX and S_BITS are the memory context and log2 size of the load. - - WHICH is the offset into the CPUTLBEntry structure of the slot to read. - This should be offsetof addr_read or addr_write. - - Outputs: - LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses) - positions of the displacements of forward jumps to the TLB miss case. - - Second argument register is loaded with the low part of the address. - In the TLB hit case, it has been adjusted as indicated by the TLB - and so is a host address. In the TLB miss case, it continues to - hold a guest address. - - First argument register is clobbered. */ - -static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, - int mem_index, MemOp opc, - tcg_insn_unit **label_ptr, int which) -{ - TCGType ttype = TCG_TYPE_I32; - TCGType tlbtype = TCG_TYPE_I32; - int trexw = 0, hrexw = 0, tlbrexw = 0; - unsigned a_bits = get_alignment_bits(opc); - unsigned s_bits = opc & MO_SIZE; - unsigned a_mask = (1 << a_bits) - 1; - unsigned s_mask = (1 << s_bits) - 1; - target_ulong tlb_mask; - - if (TCG_TARGET_REG_BITS == 64) { - if (TARGET_LONG_BITS == 64) { - ttype = TCG_TYPE_I64; - trexw = P_REXW; - } - if (TCG_TYPE_PTR == TCG_TYPE_I64) { - hrexw = P_REXW; - if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) { - tlbtype = TCG_TYPE_I64; - tlbrexw = P_REXW; - } - } - } - - tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo); - tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - - tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0, - TLB_MASK_TABLE_OFS(mem_index) + - offsetof(CPUTLBDescFast, mask)); - - tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0, - TLB_MASK_TABLE_OFS(mem_index) + - offsetof(CPUTLBDescFast, table)); - - /* If the required alignment is at least as large as the access, simply - copy the address and mask. For lesser alignments, check that we don't - cross pages for the complete access. */ - if (a_bits >= s_bits) { - tcg_out_mov(s, ttype, TCG_REG_L1, addrlo); - } else { - tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1, - addrlo, s_mask - a_mask); - } - tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask; - tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0); - - /* cmp 0(TCG_REG_L0), TCG_REG_L1 */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, - TCG_REG_L1, TCG_REG_L0, which); - - /* Prepare for both the fast path add of the tlb addend, and the slow - path function argument setup. */ - tcg_out_mov(s, ttype, TCG_REG_L1, addrlo); - - /* jne slow_path */ - tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); - label_ptr[0] = s->code_ptr; - s->code_ptr += 4; - - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - /* cmp 4(TCG_REG_L0), addrhi */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, which + 4); - - /* jne slow_path */ - tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); - label_ptr[1] = s->code_ptr; - s->code_ptr += 4; - } - - /* TLB Hit. */ - - /* add addend(TCG_REG_L0), TCG_REG_L1 */ - tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L1, TCG_REG_L0, - offsetof(CPUTLBEntry, addend)); -} - -/* - * Record the context of a call to the out of line helper code for the slow path - * for a load or store, so that we can later generate the correct helper code - */ -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, - TCGType type, MemOpIdx oi, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addrhi, - tcg_insn_unit *raddr, - tcg_insn_unit **label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->type = type; - label->datalo_reg = datalo; - label->datahi_reg = datahi; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - label->raddr = tcg_splitwx_to_rx(raddr); - label->label_ptr[0] = label_ptr[0]; - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - label->label_ptr[1] = label_ptr[1]; - } -} - /* * Generate code for the slow path for a load at the end of block */ @@ -2061,27 +1932,6 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) return true; } #else - -static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo, - TCGReg addrhi, unsigned a_bits) -{ - unsigned a_mask = (1 << a_bits) - 1; - TCGLabelQemuLdst *label; - - tcg_out_testi(s, addrlo, a_mask); - /* jne slow_path */ - tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); - - label = new_ldst_label(s); - label->is_ld = is_ld; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - label->raddr = tcg_splitwx_to_rx(s->code_ptr + 4); - label->label_ptr[0] = s->code_ptr; - - s->code_ptr += 4; -} - static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) { /* resolve label address */ @@ -2159,6 +2009,135 @@ static inline int setup_guest_base_seg(void) #endif /* setup_guest_base_seg */ #endif /* SOFTMMU */ +/* + * For softmmu, perform the TLB load and compare. + * For useronly, perform any required alignment tests. + * In both cases, return a TCGLabelQemuLdst structure if the slow path + * is required and fill in @h with the host address for the fast path. + */ +static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, + TCGReg addrlo, TCGReg addrhi, + MemOpIdx oi, bool is_ld) +{ + TCGLabelQemuLdst *ldst = NULL; + MemOp opc = get_memop(oi); + unsigned a_bits = get_alignment_bits(opc); + unsigned a_mask = (1 << a_bits) - 1; + +#ifdef CONFIG_SOFTMMU + int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write); + TCGType ttype = TCG_TYPE_I32; + TCGType tlbtype = TCG_TYPE_I32; + int trexw = 0, hrexw = 0, tlbrexw = 0; + unsigned mem_index = get_mmuidx(oi); + unsigned s_bits = opc & MO_SIZE; + unsigned s_mask = (1 << s_bits) - 1; + target_ulong tlb_mask; + + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + + if (TCG_TARGET_REG_BITS == 64) { + if (TARGET_LONG_BITS == 64) { + ttype = TCG_TYPE_I64; + trexw = P_REXW; + } + if (TCG_TYPE_PTR == TCG_TYPE_I64) { + hrexw = P_REXW; + if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) { + tlbtype = TCG_TYPE_I64; + tlbrexw = P_REXW; + } + } + } + + tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo); + tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + + tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0, + TLB_MASK_TABLE_OFS(mem_index) + + offsetof(CPUTLBDescFast, mask)); + + tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0, + TLB_MASK_TABLE_OFS(mem_index) + + offsetof(CPUTLBDescFast, table)); + + /* + * If the required alignment is at least as large as the access, simply + * copy the address and mask. For lesser alignments, check that we don't + * cross pages for the complete access. + */ + if (a_bits >= s_bits) { + tcg_out_mov(s, ttype, TCG_REG_L1, addrlo); + } else { + tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1, + addrlo, s_mask - a_mask); + } + tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask; + tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0); + + /* cmp 0(TCG_REG_L0), TCG_REG_L1 */ + tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, + TCG_REG_L1, TCG_REG_L0, cmp_ofs); + + /* + * Prepare for both the fast path add of the tlb addend, and the slow + * path function argument setup. + */ + *h = (HostAddress) { + .base = TCG_REG_L1, + .index = -1 + }; + tcg_out_mov(s, ttype, h->base, addrlo); + + /* jne slow_path */ + tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); + ldst->label_ptr[0] = s->code_ptr; + s->code_ptr += 4; + + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + /* cmp 4(TCG_REG_L0), addrhi */ + tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, cmp_ofs + 4); + + /* jne slow_path */ + tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); + ldst->label_ptr[1] = s->code_ptr; + s->code_ptr += 4; + } + + /* TLB Hit. */ + + /* add addend(TCG_REG_L0), TCG_REG_L1 */ + tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, h->base, TCG_REG_L0, + offsetof(CPUTLBEntry, addend)); +#else + if (a_bits) { + ldst = new_ldst_label(s); + + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + + tcg_out_testi(s, addrlo, a_mask); + /* jne slow_path */ + tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); + ldst->label_ptr[0] = s->code_ptr; + s->code_ptr += 4; + } + + *h = x86_guest_base; + h->base = addrlo; +#endif + + return ldst; +} + static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, HostAddress h, TCGType type, MemOp memop) { @@ -2258,35 +2237,18 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, TCGReg addrlo, TCGReg addrhi, MemOpIdx oi, TCGType data_type) { - MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; HostAddress h; -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label_ptr[2]; + ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true); + tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, get_memop(oi)); - tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc, - label_ptr, offsetof(CPUTLBEntry, addr_read)); - - /* TLB Hit. */ - h.base = TCG_REG_L1; - h.index = -1; - h.ofs = 0; - h.seg = 0; - tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, opc); - - /* Record the current context of a load into ldst label */ - add_qemu_ldst_label(s, true, data_type, oi, datalo, datahi, - addrlo, addrhi, s->code_ptr, label_ptr); -#else - unsigned a_bits = get_alignment_bits(opc); - if (a_bits) { - tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - - h = x86_guest_base; - h.base = addrlo; - tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, opc); -#endif } static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, @@ -2345,36 +2307,18 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, TCGReg addrlo, TCGReg addrhi, MemOpIdx oi, TCGType data_type) { - MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; HostAddress h; -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label_ptr[2]; + ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false); + tcg_out_qemu_st_direct(s, datalo, datahi, h, get_memop(oi)); - tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc, - label_ptr, offsetof(CPUTLBEntry, addr_write)); - - /* TLB Hit. */ - h.base = TCG_REG_L1; - h.index = -1; - h.ofs = 0; - h.seg = 0; - tcg_out_qemu_st_direct(s, datalo, datahi, h, opc); - - /* Record the current context of a store into ldst label */ - add_qemu_ldst_label(s, false, data_type, oi, datalo, datahi, - addrlo, addrhi, s->code_ptr, label_ptr); -#else - unsigned a_bits = get_alignment_bits(opc); - if (a_bits) { - tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - - h = x86_guest_base; - h.base = addrlo; - - tcg_out_qemu_st_direct(s, datalo, datahi, h, opc); -#endif } static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) From 1fac4648fed615f0507368633bb7d0a9821873d6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 16 Apr 2023 09:00:18 +0200 Subject: [PATCH 12/53] tcg/i386: Use indexed addressing for softmmu fast path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since tcg_out_{ld,st}_helper_args, the slow path no longer requires the address argument to be set up by the tlb load sequence. Use a plain load for the addend and indexed addressing with the original input address register. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.c.inc | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 5d702b69ac..18b0e7997d 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -1837,7 +1837,8 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs); } else { tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); - /* The second argument is already loaded with addrlo. */ + tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1], + l->addrlo_reg); tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi); tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3], (uintptr_t)l->raddr); @@ -1910,7 +1911,8 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs); } else { tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); - /* The second argument is already loaded with addrlo. */ + tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1], + l->addrlo_reg); tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32), tcg_target_call_iarg_regs[2], l->datalo_reg); tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi); @@ -2085,16 +2087,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, TCG_REG_L1, TCG_REG_L0, cmp_ofs); - /* - * Prepare for both the fast path add of the tlb addend, and the slow - * path function argument setup. - */ - *h = (HostAddress) { - .base = TCG_REG_L1, - .index = -1 - }; - tcg_out_mov(s, ttype, h->base, addrlo); - /* jne slow_path */ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); ldst->label_ptr[0] = s->code_ptr; @@ -2111,10 +2103,13 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, } /* TLB Hit. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0, + offsetof(CPUTLBEntry, addend)); - /* add addend(TCG_REG_L0), TCG_REG_L1 */ - tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, h->base, TCG_REG_L0, - offsetof(CPUTLBEntry, addend)); + *h = (HostAddress) { + .base = addrlo, + .index = TCG_REG_L0, + }; #else if (a_bits) { ldst = new_ldst_label(s); From 1e612dd66a3915d9560a13341b3e32a2bafbff83 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 21 Apr 2023 12:37:20 +0100 Subject: [PATCH 13/53] tcg/aarch64: Introduce prepare_host_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment, and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st into one function that returns HostAddress and TCGLabelQemuLdst structures. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 313 +++++++++++++++-------------------- 1 file changed, 133 insertions(+), 180 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index d8d464e4a0..202b90c001 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1667,113 +1667,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_goto(s, lb->raddr); return true; } - -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi, - TCGType ext, TCGReg data_reg, TCGReg addr_reg, - tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->type = ext; - label->datalo_reg = data_reg; - label->addrlo_reg = addr_reg; - label->raddr = tcg_splitwx_to_rx(raddr); - label->label_ptr[0] = label_ptr; -} - -/* We expect to use a 7-bit scaled negative offset from ENV. */ -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512); - -/* These offsets are built into the LDP below. */ -QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); -QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); - -/* Load and compare a TLB entry, emitting the conditional jump to the - slow path for the failure case, which will be patched later when finalizing - the slow path. Generated code returns the host addend in X1, - clobbers X0,X2,X3,TMP. */ -static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, - tcg_insn_unit **label_ptr, int mem_index, - bool is_read) -{ - unsigned a_bits = get_alignment_bits(opc); - unsigned s_bits = opc & MO_SIZE; - unsigned a_mask = (1u << a_bits) - 1; - unsigned s_mask = (1u << s_bits) - 1; - TCGReg x3; - TCGType mask_type; - uint64_t compare_mask; - - mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32 - ? TCG_TYPE_I64 : TCG_TYPE_I32); - - /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */ - tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0, - TLB_MASK_TABLE_OFS(mem_index), 1, 0); - - /* Extract the TLB index from the address into X0. */ - tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, - TCG_REG_X0, TCG_REG_X0, addr_reg, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - - /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */ - tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0); - - /* Load the tlb comparator into X0, and the fast path addend into X1. */ - tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read - ? offsetof(CPUTLBEntry, addr_read) - : offsetof(CPUTLBEntry, addr_write)); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1, - offsetof(CPUTLBEntry, addend)); - - /* For aligned accesses, we check the first byte and include the alignment - bits within the address. For unaligned access, we check that we don't - cross pages using the address of the last byte of the access. */ - if (a_bits >= s_bits) { - x3 = addr_reg; - } else { - tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, - TCG_REG_X3, addr_reg, s_mask - a_mask); - x3 = TCG_REG_X3; - } - compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; - - /* Store the page mask part of the address into X3. */ - tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, - TCG_REG_X3, x3, compare_mask); - - /* Perform the address comparison. */ - tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0); - - /* If not equal, we jump to the slow path. */ - *label_ptr = s->code_ptr; - tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); -} - #else -static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg, - unsigned a_bits) -{ - unsigned a_mask = (1 << a_bits) - 1; - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->addrlo_reg = addr_reg; - - /* tst addr, #mask */ - tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask); - - label->label_ptr[0] = s->code_ptr; - - /* b.ne slow_path */ - tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); - - label->raddr = tcg_splitwx_to_rx(s->code_ptr); -} - static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) { if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { @@ -1801,6 +1695,125 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } #endif /* CONFIG_SOFTMMU */ +/* + * For softmmu, perform the TLB load and compare. + * For useronly, perform any required alignment tests. + * In both cases, return a TCGLabelQemuLdst structure if the slow path + * is required and fill in @h with the host address for the fast path. + */ +static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, + TCGReg addr_reg, MemOpIdx oi, + bool is_ld) +{ + TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; + TCGLabelQemuLdst *ldst = NULL; + MemOp opc = get_memop(oi); + unsigned a_bits = get_alignment_bits(opc); + unsigned a_mask = (1u << a_bits) - 1; + +#ifdef CONFIG_SOFTMMU + unsigned s_bits = opc & MO_SIZE; + unsigned s_mask = (1u << s_bits) - 1; + unsigned mem_index = get_mmuidx(oi); + TCGReg x3; + TCGType mask_type; + uint64_t compare_mask; + + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addr_reg; + + mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32 + ? TCG_TYPE_I64 : TCG_TYPE_I32); + + /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */ + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512); + QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); + QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); + tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0, + TLB_MASK_TABLE_OFS(mem_index), 1, 0); + + /* Extract the TLB index from the address into X0. */ + tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, + TCG_REG_X0, TCG_REG_X0, addr_reg, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + + /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */ + tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0); + + /* Load the tlb comparator into X0, and the fast path addend into X1. */ + tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, + is_ld ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write)); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1, + offsetof(CPUTLBEntry, addend)); + + /* + * For aligned accesses, we check the first byte and include the alignment + * bits within the address. For unaligned access, we check that we don't + * cross pages using the address of the last byte of the access. + */ + if (a_bits >= s_bits) { + x3 = addr_reg; + } else { + tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, + TCG_REG_X3, addr_reg, s_mask - a_mask); + x3 = TCG_REG_X3; + } + compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; + + /* Store the page mask part of the address into X3. */ + tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, + TCG_REG_X3, x3, compare_mask); + + /* Perform the address comparison. */ + tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0); + + /* If not equal, we jump to the slow path. */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); + + *h = (HostAddress){ + .base = TCG_REG_X1, + .index = addr_reg, + .index_ext = addr_type + }; +#else + if (a_mask) { + ldst = new_ldst_label(s); + + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addr_reg; + + /* tst addr, #mask */ + tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask); + + /* b.ne slow_path */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); + } + + if (USE_GUEST_BASE) { + *h = (HostAddress){ + .base = TCG_REG_GUEST_BASE, + .index = addr_reg, + .index_ext = addr_type + }; + } else { + *h = (HostAddress){ + .base = addr_reg, + .index = TCG_REG_XZR, + .index_ext = TCG_TYPE_I64 + }; + } +#endif + + return ldst; +} + static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, TCGReg data_r, HostAddress h) { @@ -1857,93 +1870,33 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, MemOpIdx oi, TCGType data_type) { - MemOp memop = get_memop(oi); - TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; + TCGLabelQemuLdst *ldst; HostAddress h; - /* Byte swapping is left to middle-end expansion. */ - tcg_debug_assert((memop & MO_BSWAP) == 0); + ldst = prepare_host_addr(s, &h, addr_reg, oi, true); + tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h); -#ifdef CONFIG_SOFTMMU - tcg_insn_unit *label_ptr; - - tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 1); - - h = (HostAddress){ - .base = TCG_REG_X1, - .index = addr_reg, - .index_ext = addr_type - }; - tcg_out_qemu_ld_direct(s, memop, data_type, data_reg, h); - - add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg, - s->code_ptr, label_ptr); -#else /* !CONFIG_SOFTMMU */ - unsigned a_bits = get_alignment_bits(memop); - if (a_bits) { - tcg_out_test_alignment(s, true, addr_reg, a_bits); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = data_reg; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - if (USE_GUEST_BASE) { - h = (HostAddress){ - .base = TCG_REG_GUEST_BASE, - .index = addr_reg, - .index_ext = addr_type - }; - } else { - h = (HostAddress){ - .base = addr_reg, - .index = TCG_REG_XZR, - .index_ext = TCG_TYPE_I64 - }; - } - tcg_out_qemu_ld_direct(s, memop, data_type, data_reg, h); -#endif /* CONFIG_SOFTMMU */ } static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, MemOpIdx oi, TCGType data_type) { - MemOp memop = get_memop(oi); - TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; + TCGLabelQemuLdst *ldst; HostAddress h; - /* Byte swapping is left to middle-end expansion. */ - tcg_debug_assert((memop & MO_BSWAP) == 0); + ldst = prepare_host_addr(s, &h, addr_reg, oi, false); + tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h); -#ifdef CONFIG_SOFTMMU - tcg_insn_unit *label_ptr; - - tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 0); - - h = (HostAddress){ - .base = TCG_REG_X1, - .index = addr_reg, - .index_ext = addr_type - }; - tcg_out_qemu_st_direct(s, memop, data_reg, h); - - add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg, - s->code_ptr, label_ptr); -#else /* !CONFIG_SOFTMMU */ - unsigned a_bits = get_alignment_bits(memop); - if (a_bits) { - tcg_out_test_alignment(s, false, addr_reg, a_bits); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = data_reg; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - if (USE_GUEST_BASE) { - h = (HostAddress){ - .base = TCG_REG_GUEST_BASE, - .index = addr_reg, - .index_ext = addr_type - }; - } else { - h = (HostAddress){ - .base = addr_reg, - .index = TCG_REG_XZR, - .index_ext = TCG_TYPE_I64 - }; - } - tcg_out_qemu_st_direct(s, memop, data_reg, h); -#endif /* CONFIG_SOFTMMU */ } static const tcg_insn_unit *tb_ret_addr; From 7131d3cf72af0ecd54d325cbe2d88db144e52fed Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 23 Apr 2023 15:54:41 +0100 Subject: [PATCH 14/53] tcg/arm: Introduce prepare_host_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merge tcg_out_tlb_load, add_qemu_ldst_label, and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st into one function that returns HostAddress and TCGLabelQemuLdst structures. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 351 ++++++++++++++++++--------------------- 1 file changed, 159 insertions(+), 192 deletions(-) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index b6b4ffc546..c744512778 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1434,125 +1434,6 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, } } -#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) - -/* We expect to use an 9-bit sign-magnitude negative offset from ENV. */ -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256); - -/* These offsets are built into the LDRD below. */ -QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); -QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4); - -/* Load and compare a TLB entry, leaving the flags set. Returns the register - containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */ - -static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, - MemOp opc, int mem_index, bool is_load) -{ - int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) - : offsetof(CPUTLBEntry, addr_write)); - int fast_off = TLB_MASK_TABLE_OFS(mem_index); - unsigned s_mask = (1 << (opc & MO_SIZE)) - 1; - unsigned a_mask = (1 << get_alignment_bits(opc)) - 1; - TCGReg t_addr; - - /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */ - tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off); - - /* Extract the tlb index from the address into R0. */ - tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo, - SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)); - - /* - * Add the tlb_table pointer, creating the CPUTLBEntry address in R1. - * Load the tlb comparator into R2/R3 and the fast path addend into R1. - */ - if (cmp_off == 0) { - if (TARGET_LONG_BITS == 64) { - tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0); - } else { - tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0); - } - } else { - tcg_out_dat_reg(s, COND_AL, ARITH_ADD, - TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0); - if (TARGET_LONG_BITS == 64) { - tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off); - } else { - tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off); - } - } - - /* Load the tlb addend. */ - tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1, - offsetof(CPUTLBEntry, addend)); - - /* - * Check alignment, check comparators. - * Do this in 2-4 insns. Use MOVW for v7, if possible, - * to reduce the number of sequential conditional instructions. - * Almost all guests have at least 4k pages, which means that we need - * to clear at least 9 bits even for an 8-byte memory, which means it - * isn't worth checking for an immediate operand for BIC. - * - * For unaligned accesses, test the page of the last unit of alignment. - * This leaves the least significant alignment bits unchanged, and of - * course must be zero. - */ - t_addr = addrlo; - if (a_mask < s_mask) { - t_addr = TCG_REG_R0; - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr, - addrlo, s_mask - a_mask); - } - if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) { - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask)); - tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP, - t_addr, TCG_REG_TMP, 0); - tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0); - } else { - if (a_mask) { - tcg_debug_assert(a_mask <= 0xff); - tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask); - } - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr, - SHIFT_IMM_LSR(TARGET_PAGE_BITS)); - tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP, - 0, TCG_REG_R2, TCG_REG_TMP, - SHIFT_IMM_LSL(TARGET_PAGE_BITS)); - } - - if (TARGET_LONG_BITS == 64) { - tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0); - } - - return TCG_REG_R1; -} - -/* Record the context of a call to the out of line helper code for the slow - path for a load or store, so that we can later generate the correct - helper code. */ -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, - MemOpIdx oi, TCGType type, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addrhi, - tcg_insn_unit *raddr, - tcg_insn_unit *label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->type = type; - label->datalo_reg = datalo; - label->datahi_reg = datahi; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - label->raddr = tcg_splitwx_to_rx(raddr); - label->label_ptr[0] = label_ptr; -} - static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { TCGReg argreg; @@ -1636,29 +1517,6 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) return true; } #else - -static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo, - TCGReg addrhi, unsigned a_bits) -{ - unsigned a_mask = (1 << a_bits) - 1; - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - - /* We are expecting a_bits to max out at 7, and can easily support 8. */ - tcg_debug_assert(a_mask <= 0xff); - /* tst addr, #mask */ - tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask); - - /* blne slow_path */ - label->label_ptr[0] = s->code_ptr; - tcg_out_bl_imm(s, COND_NE, 0); - - label->raddr = tcg_splitwx_to_rx(s->code_ptr); -} - static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) { if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { @@ -1703,6 +1561,134 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } #endif /* SOFTMMU */ +static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, + TCGReg addrlo, TCGReg addrhi, + MemOpIdx oi, bool is_ld) +{ + TCGLabelQemuLdst *ldst = NULL; + MemOp opc = get_memop(oi); + MemOp a_bits = get_alignment_bits(opc); + unsigned a_mask = (1 << a_bits) - 1; + +#ifdef CONFIG_SOFTMMU + int mem_index = get_mmuidx(oi); + int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write); + int fast_off = TLB_MASK_TABLE_OFS(mem_index); + unsigned s_mask = (1 << (opc & MO_SIZE)) - 1; + TCGReg t_addr; + + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + + /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */ + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256); + QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); + QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4); + tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off); + + /* Extract the tlb index from the address into R0. */ + tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo, + SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)); + + /* + * Add the tlb_table pointer, creating the CPUTLBEntry address in R1. + * Load the tlb comparator into R2/R3 and the fast path addend into R1. + */ + if (cmp_off == 0) { + if (TARGET_LONG_BITS == 64) { + tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0); + } else { + tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0); + } + } else { + tcg_out_dat_reg(s, COND_AL, ARITH_ADD, + TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0); + if (TARGET_LONG_BITS == 64) { + tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off); + } else { + tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off); + } + } + + /* Load the tlb addend. */ + tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1, + offsetof(CPUTLBEntry, addend)); + + /* + * Check alignment, check comparators. + * Do this in 2-4 insns. Use MOVW for v7, if possible, + * to reduce the number of sequential conditional instructions. + * Almost all guests have at least 4k pages, which means that we need + * to clear at least 9 bits even for an 8-byte memory, which means it + * isn't worth checking for an immediate operand for BIC. + * + * For unaligned accesses, test the page of the last unit of alignment. + * This leaves the least significant alignment bits unchanged, and of + * course must be zero. + */ + t_addr = addrlo; + if (a_mask < s_mask) { + t_addr = TCG_REG_R0; + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr, + addrlo, s_mask - a_mask); + } + if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask)); + tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP, + t_addr, TCG_REG_TMP, 0); + tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0); + } else { + if (a_mask) { + tcg_debug_assert(a_mask <= 0xff); + tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask); + } + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr, + SHIFT_IMM_LSR(TARGET_PAGE_BITS)); + tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP, + 0, TCG_REG_R2, TCG_REG_TMP, + SHIFT_IMM_LSL(TARGET_PAGE_BITS)); + } + + if (TARGET_LONG_BITS == 64) { + tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0); + } + + *h = (HostAddress){ + .cond = COND_AL, + .base = addrlo, + .index = TCG_REG_R1, + .index_scratch = true, + }; +#else + if (a_mask) { + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + + /* We are expecting a_bits to max out at 7 */ + tcg_debug_assert(a_mask <= 0xff); + /* tst addr, #mask */ + tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask); + } + + *h = (HostAddress){ + .cond = COND_AL, + .base = addrlo, + .index = guest_base ? TCG_REG_GUEST_BASE : -1, + .index_scratch = false, + }; +#endif + + return ldst; +} + static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo, TCGReg datahi, HostAddress h) { @@ -1799,37 +1785,28 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, MemOpIdx oi, TCGType data_type) { MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; HostAddress h; -#ifdef CONFIG_SOFTMMU - h.cond = COND_AL; - h.base = addrlo; - h.index_scratch = true; - h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 1); + ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; - /* - * This a conditional BL only to load a pointer within this opcode into - * LR for the slow path. We will not be using the value for a tail call. - */ - tcg_insn_unit *label_ptr = s->code_ptr; - tcg_out_bl_imm(s, COND_NE, 0); + /* + * This a conditional BL only to load a pointer within this + * opcode into LR for the slow path. We will not be using + * the value for a tail call. + */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out_bl_imm(s, COND_NE, 0); - tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h); - - add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi, - addrlo, addrhi, s->code_ptr, label_ptr); -#else - unsigned a_bits = get_alignment_bits(opc); - if (a_bits) { - tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); + tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h); + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } else { + tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h); } - - h.cond = COND_AL; - h.base = addrlo; - h.index = guest_base ? TCG_REG_GUEST_BASE : -1; - h.index_scratch = false; - tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h); -#endif } static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo, @@ -1891,35 +1868,25 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, MemOpIdx oi, TCGType data_type) { MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; HostAddress h; -#ifdef CONFIG_SOFTMMU - h.cond = COND_EQ; - h.base = addrlo; - h.index_scratch = true; - h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 0); - tcg_out_qemu_st_direct(s, opc, datalo, datahi, h); + ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; - /* The conditional call must come last, as we're going to return here. */ - tcg_insn_unit *label_ptr = s->code_ptr; - tcg_out_bl_imm(s, COND_NE, 0); - - add_qemu_ldst_label(s, false, oi, data_type, datalo, datahi, - addrlo, addrhi, s->code_ptr, label_ptr); -#else - unsigned a_bits = get_alignment_bits(opc); - - h.cond = COND_AL; - if (a_bits) { - tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits); h.cond = COND_EQ; - } + tcg_out_qemu_st_direct(s, opc, datalo, datahi, h); - h.base = addrlo; - h.index = guest_base ? TCG_REG_GUEST_BASE : -1; - h.index_scratch = false; - tcg_out_qemu_st_direct(s, opc, datalo, datahi, h); -#endif + /* The conditional call is last, as we're going to return here. */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out_bl_imm(s, COND_NE, 0); + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } else { + tcg_out_qemu_st_direct(s, opc, datalo, datahi, h); + } } static void tcg_out_epilogue(TCGContext *s); From e63eed328f20e114c009f0dda98fdc6a58312e1c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 23 Apr 2023 17:02:44 +0100 Subject: [PATCH 15/53] tcg/loongarch64: Introduce prepare_host_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment, tcg_out_zext_addr_if_32_bit, and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st into one function that returns HostAddress and TCGLabelQemuLdst structures. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/loongarch64/tcg-target.c.inc | 255 +++++++++++++------------------ 1 file changed, 105 insertions(+), 150 deletions(-) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 6a87a5e5a3..2f2c34b930 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -818,81 +818,12 @@ static void * const qemu_st_helpers[4] = { [MO_64] = helper_le_stq_mmu, }; -/* We expect to use a 12-bit negative offset from ENV. */ -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11)); - static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) { tcg_out_opc_b(s, 0); return reloc_br_sd10k16(s->code_ptr - 1, target); } -/* - * Emits common code for TLB addend lookup, that eventually loads the - * addend in TCG_REG_TMP2. - */ -static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, MemOpIdx oi, - tcg_insn_unit **label_ptr, bool is_load) -{ - MemOp opc = get_memop(oi); - unsigned s_bits = opc & MO_SIZE; - unsigned a_bits = get_alignment_bits(opc); - tcg_target_long compare_mask; - int mem_index = get_mmuidx(oi); - int fast_ofs = TLB_MASK_TABLE_OFS(mem_index); - int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask); - int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table); - - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs); - - tcg_out_opc_srli_d(s, TCG_REG_TMP2, addrl, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0); - tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1); - - /* Load the tlb comparator and the addend. */ - tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2, - is_load ? offsetof(CPUTLBEntry, addr_read) - : offsetof(CPUTLBEntry, addr_write)); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2, - offsetof(CPUTLBEntry, addend)); - - /* We don't support unaligned accesses. */ - if (a_bits < s_bits) { - a_bits = s_bits; - } - /* Clear the non-page, non-alignment bits from the address. */ - compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1); - tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask); - tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addrl); - - /* Compare masked address with the TLB entry. */ - label_ptr[0] = s->code_ptr; - tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0); - - /* TLB Hit - addend in TCG_REG_TMP2, ready for use. */ -} - -static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi, - TCGType type, - TCGReg datalo, TCGReg addrlo, - void *raddr, tcg_insn_unit **label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->type = type; - label->datalo_reg = datalo; - label->datahi_reg = 0; /* unused */ - label->addrlo_reg = addrlo; - label->addrhi_reg = 0; /* unused */ - label->raddr = tcg_splitwx_to_rx(raddr); - label->label_ptr[0] = label_ptr[0]; -} - static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { MemOpIdx oi = l->oi; @@ -941,33 +872,6 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) return tcg_out_goto(s, l->raddr); } #else - -/* - * Alignment helpers for user-mode emulation - */ - -static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg, - unsigned a_bits) -{ - TCGLabelQemuLdst *l = new_ldst_label(s); - - l->is_ld = is_ld; - l->addrlo_reg = addr_reg; - - /* - * Without micro-architecture details, we don't know which of bstrpick or - * andi is faster, so use bstrpick as it's not constrained by imm field - * width. (Not to say alignments >= 2^12 are going to happen any time - * soon, though) - */ - tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1); - - l->label_ptr[0] = s->code_ptr; - tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0); - - l->raddr = tcg_splitwx_to_rx(s->code_ptr); -} - static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) { /* resolve label address */ @@ -997,27 +901,102 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) #endif /* CONFIG_SOFTMMU */ -/* - * `ext32u` the address register into the temp register given, - * if target is 32-bit, no-op otherwise. - * - * Returns the address register ready for use with TLB addend. - */ -static TCGReg tcg_out_zext_addr_if_32_bit(TCGContext *s, - TCGReg addr, TCGReg tmp) -{ - if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, tmp, addr); - return tmp; - } - return addr; -} - typedef struct { TCGReg base; TCGReg index; } HostAddress; +/* + * For softmmu, perform the TLB load and compare. + * For useronly, perform any required alignment tests. + * In both cases, return a TCGLabelQemuLdst structure if the slow path + * is required and fill in @h with the host address for the fast path. + */ +static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, + TCGReg addr_reg, MemOpIdx oi, + bool is_ld) +{ + TCGLabelQemuLdst *ldst = NULL; + MemOp opc = get_memop(oi); + unsigned a_bits = get_alignment_bits(opc); + +#ifdef CONFIG_SOFTMMU + unsigned s_bits = opc & MO_SIZE; + int mem_index = get_mmuidx(oi); + int fast_ofs = TLB_MASK_TABLE_OFS(mem_index); + int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask); + int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table); + tcg_target_long compare_mask; + + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addr_reg; + + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11)); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs); + + tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0); + tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1); + + /* Load the tlb comparator and the addend. */ + tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2, + is_ld ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write)); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2, + offsetof(CPUTLBEntry, addend)); + + /* We don't support unaligned accesses. */ + if (a_bits < s_bits) { + a_bits = s_bits; + } + /* Clear the non-page, non-alignment bits from the address. */ + compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1); + tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask); + tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg); + + /* Compare masked address with the TLB entry. */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0); + + h->index = TCG_REG_TMP2; +#else + if (a_bits) { + ldst = new_ldst_label(s); + + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addr_reg; + + /* + * Without micro-architecture details, we don't know which of + * bstrpick or andi is faster, so use bstrpick as it's not + * constrained by imm field width. Not to say alignments >= 2^12 + * are going to happen any time soon. + */ + tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1); + + ldst->label_ptr[0] = s->code_ptr; + tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0); + } + + h->index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO; +#endif + + if (TARGET_LONG_BITS == 32) { + h->base = TCG_REG_TMP0; + tcg_out_ext32u(s, h->base, addr_reg); + } else { + h->base = addr_reg; + } + + return ldst; +} + static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type, TCGReg rd, HostAddress h) { @@ -1057,29 +1036,17 @@ static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type, static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, MemOpIdx oi, TCGType data_type) { - MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; HostAddress h; -#ifdef CONFIG_SOFTMMU - tcg_insn_unit *label_ptr[1]; + ldst = prepare_host_addr(s, &h, addr_reg, oi, true); + tcg_out_qemu_ld_indexed(s, get_memop(oi), data_type, data_reg, h); - tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 1); - h.index = TCG_REG_TMP2; -#else - unsigned a_bits = get_alignment_bits(opc); - if (a_bits) { - tcg_out_test_alignment(s, true, addr_reg, a_bits); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = data_reg; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - h.index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO; -#endif - - h.base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0); - tcg_out_qemu_ld_indexed(s, opc, data_type, data_reg, h); - -#ifdef CONFIG_SOFTMMU - add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg, - s->code_ptr, label_ptr); -#endif } static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc, @@ -1109,29 +1076,17 @@ static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc, static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, MemOpIdx oi, TCGType data_type) { - MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; HostAddress h; -#ifdef CONFIG_SOFTMMU - tcg_insn_unit *label_ptr[1]; + ldst = prepare_host_addr(s, &h, addr_reg, oi, false); + tcg_out_qemu_st_indexed(s, get_memop(oi), data_reg, h); - tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 0); - h.index = TCG_REG_TMP2; -#else - unsigned a_bits = get_alignment_bits(opc); - if (a_bits) { - tcg_out_test_alignment(s, false, addr_reg, a_bits); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = data_reg; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - h.index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO; -#endif - - h.base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0); - tcg_out_qemu_st_indexed(s, opc, data_reg, h); - -#ifdef CONFIG_SOFTMMU - add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg, - s->code_ptr, label_ptr); -#endif } /* From 5b7208daa0b35eae3b8adf5cabc695bb895caeba Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 23 Apr 2023 18:34:35 +0100 Subject: [PATCH 16/53] tcg/mips: Introduce prepare_host_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment, and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st into one function that returns HostAddress and TCGLabelQemuLdst structures. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 404 ++++++++++++++++---------------------- 1 file changed, 172 insertions(+), 232 deletions(-) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index ef8350e9cd..94708e6ea7 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1181,120 +1181,6 @@ static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah) return i; } -/* We expect to use a 16-bit negative offset from ENV. */ -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768); - -/* - * Perform the tlb comparison operation. - * The complete host address is placed in BASE. - * Clobbers TMP0, TMP1, TMP2, TMP3. - */ -static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, - TCGReg addrh, MemOpIdx oi, - tcg_insn_unit *label_ptr[2], bool is_load) -{ - MemOp opc = get_memop(oi); - unsigned a_bits = get_alignment_bits(opc); - unsigned s_bits = opc & MO_SIZE; - unsigned a_mask = (1 << a_bits) - 1; - unsigned s_mask = (1 << s_bits) - 1; - int mem_index = get_mmuidx(oi); - int fast_off = TLB_MASK_TABLE_OFS(mem_index); - int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); - int table_off = fast_off + offsetof(CPUTLBDescFast, table); - int add_off = offsetof(CPUTLBEntry, addend); - int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) - : offsetof(CPUTLBEntry, addr_write)); - target_ulong tlb_mask; - - /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off); - - /* Extract the TLB index from the address into TMP3. */ - tcg_out_opc_sa(s, ALIAS_TSRL, TCG_TMP3, addrl, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0); - - /* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */ - tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1); - - /* Load the (low-half) tlb comparator. */ - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF); - } else { - tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD - : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW), - TCG_TMP0, TCG_TMP3, cmp_off); - } - - /* Zero extend a 32-bit guest address for a 64-bit host. */ - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - tcg_out_ext32u(s, base, addrl); - addrl = base; - } - - /* - * Mask the page bits, keeping the alignment bits to compare against. - * For unaligned accesses, compare against the end of the access to - * verify that it does not cross a page boundary. - */ - tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask; - tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask); - if (a_mask >= s_mask) { - tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl); - } else { - tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrl, s_mask - a_mask); - tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2); - } - - if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) { - /* Load the tlb addend for the fast path. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off); - } - - label_ptr[0] = s->code_ptr; - tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0); - - /* Load and test the high half tlb comparator. */ - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - /* delay slot */ - tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF); - - /* Load the tlb addend for the fast path. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off); - - label_ptr[1] = s->code_ptr; - tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0); - } - - /* delay slot */ - tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrl); -} - -static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi, - TCGType ext, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addrhi, - void *raddr, tcg_insn_unit *label_ptr[2]) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->type = ext; - label->datalo_reg = datalo; - label->datahi_reg = datahi; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - label->raddr = tcg_splitwx_to_rx(raddr); - label->label_ptr[0] = label_ptr[0]; - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - label->label_ptr[1] = label_ptr[1]; - } -} - static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr); @@ -1403,32 +1289,6 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } #else - -static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo, - TCGReg addrhi, unsigned a_bits) -{ - unsigned a_mask = (1 << a_bits) - 1; - TCGLabelQemuLdst *l = new_ldst_label(s); - - l->is_ld = is_ld; - l->addrlo_reg = addrlo; - l->addrhi_reg = addrhi; - - /* We are expecting a_bits to max out at 7, much lower than ANDI. */ - tcg_debug_assert(a_bits < 16); - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask); - - l->label_ptr[0] = s->code_ptr; - if (use_mips32r6_instructions) { - tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0); - } else { - tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO); - tcg_out_nop(s); - } - - l->raddr = tcg_splitwx_to_rx(s->code_ptr); -} - static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) { void *target; @@ -1478,6 +1338,154 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } #endif /* SOFTMMU */ +typedef struct { + TCGReg base; + MemOp align; +} HostAddress; + +/* + * For softmmu, perform the TLB load and compare. + * For useronly, perform any required alignment tests. + * In both cases, return a TCGLabelQemuLdst structure if the slow path + * is required and fill in @h with the host address for the fast path. + */ +static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, + TCGReg addrlo, TCGReg addrhi, + MemOpIdx oi, bool is_ld) +{ + TCGLabelQemuLdst *ldst = NULL; + MemOp opc = get_memop(oi); + unsigned a_bits = get_alignment_bits(opc); + unsigned s_bits = opc & MO_SIZE; + unsigned a_mask = (1 << a_bits) - 1; + TCGReg base; + +#ifdef CONFIG_SOFTMMU + unsigned s_mask = (1 << s_bits) - 1; + int mem_index = get_mmuidx(oi); + int fast_off = TLB_MASK_TABLE_OFS(mem_index); + int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); + int table_off = fast_off + offsetof(CPUTLBDescFast, table); + int add_off = offsetof(CPUTLBEntry, addend); + int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write); + target_ulong tlb_mask; + + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + base = TCG_REG_A0; + + /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off); + + /* Extract the TLB index from the address into TMP3. */ + tcg_out_opc_sa(s, ALIAS_TSRL, TCG_TMP3, addrlo, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0); + + /* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */ + tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1); + + /* Load the (low-half) tlb comparator. */ + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF); + } else { + tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD + : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW), + TCG_TMP0, TCG_TMP3, cmp_off); + } + + /* Zero extend a 32-bit guest address for a 64-bit host. */ + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + tcg_out_ext32u(s, base, addrlo); + addrlo = base; + } + + /* + * Mask the page bits, keeping the alignment bits to compare against. + * For unaligned accesses, compare against the end of the access to + * verify that it does not cross a page boundary. + */ + tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask; + tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask); + if (a_mask >= s_mask) { + tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo); + } else { + tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrlo, s_mask - a_mask); + tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2); + } + + if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) { + /* Load the tlb addend for the fast path. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off); + } + + ldst->label_ptr[0] = s->code_ptr; + tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0); + + /* Load and test the high half tlb comparator. */ + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + /* delay slot */ + tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF); + + /* Load the tlb addend for the fast path. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off); + + ldst->label_ptr[1] = s->code_ptr; + tcg_out_opc_br(s, OPC_BNE, addrhi, TCG_TMP0); + } + + /* delay slot */ + tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrlo); +#else + if (a_mask && (use_mips32r6_instructions || a_bits != s_bits)) { + ldst = new_ldst_label(s); + + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + + /* We are expecting a_bits to max out at 7, much lower than ANDI. */ + tcg_debug_assert(a_bits < 16); + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask); + + ldst->label_ptr[0] = s->code_ptr; + if (use_mips32r6_instructions) { + tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0); + } else { + tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO); + tcg_out_nop(s); + } + } + + base = addrlo; + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + tcg_out_ext32u(s, TCG_REG_A0, base); + base = TCG_REG_A0; + } + if (guest_base) { + if (guest_base == (int16_t)guest_base) { + tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base); + } else { + tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base, + TCG_GUEST_BASE_REG); + } + base = TCG_REG_A0; + } +#endif + + h->base = base; + h->align = a_bits; + return ldst; +} + static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, MemOp opc, TCGType type) { @@ -1707,57 +1715,23 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, MemOpIdx oi, TCGType data_type) { MemOp opc = get_memop(oi); - unsigned a_bits = get_alignment_bits(opc); - unsigned s_bits = opc & MO_SIZE; - TCGReg base; + TCGLabelQemuLdst *ldst; + HostAddress h; - /* - * R6 removes the left/right instructions but requires the - * system to support misaligned memory accesses. - */ -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label_ptr[2]; + ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true); - base = TCG_REG_A0; - tcg_out_tlb_load(s, base, addrlo, addrhi, oi, label_ptr, 1); - if (use_mips32r6_instructions || a_bits >= s_bits) { - tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type); + if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) { + tcg_out_qemu_ld_direct(s, datalo, datahi, h.base, opc, data_type); } else { - tcg_out_qemu_ld_unalign(s, datalo, datahi, base, opc, data_type); + tcg_out_qemu_ld_unalign(s, datalo, datahi, h.base, opc, data_type); } - add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi, - addrlo, addrhi, s->code_ptr, label_ptr); -#else - base = addrlo; - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - tcg_out_ext32u(s, TCG_REG_A0, base); - base = TCG_REG_A0; + + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - if (guest_base) { - if (guest_base == (int16_t)guest_base) { - tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base); - } else { - tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base, - TCG_GUEST_BASE_REG); - } - base = TCG_REG_A0; - } - if (use_mips32r6_instructions) { - if (a_bits) { - tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); - } - tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type); - } else { - if (a_bits && a_bits != s_bits) { - tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); - } - if (a_bits >= s_bits) { - tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type); - } else { - tcg_out_qemu_ld_unalign(s, datalo, datahi, base, opc, data_type); - } - } -#endif } static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, @@ -1899,57 +1873,23 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, MemOpIdx oi, TCGType data_type) { MemOp opc = get_memop(oi); - unsigned a_bits = get_alignment_bits(opc); - unsigned s_bits = opc & MO_SIZE; - TCGReg base; + TCGLabelQemuLdst *ldst; + HostAddress h; - /* - * R6 removes the left/right instructions but requires the - * system to support misaligned memory accesses. - */ -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label_ptr[2]; + ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false); - base = TCG_REG_A0; - tcg_out_tlb_load(s, base, addrlo, addrhi, oi, label_ptr, 0); - if (use_mips32r6_instructions || a_bits >= s_bits) { - tcg_out_qemu_st_direct(s, datalo, datahi, base, opc); + if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) { + tcg_out_qemu_st_direct(s, datalo, datahi, h.base, opc); } else { - tcg_out_qemu_st_unalign(s, datalo, datahi, base, opc); + tcg_out_qemu_st_unalign(s, datalo, datahi, h.base, opc); } - add_qemu_ldst_label(s, false, oi, data_type, datalo, datahi, - addrlo, addrhi, s->code_ptr, label_ptr); -#else - base = addrlo; - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - tcg_out_ext32u(s, TCG_REG_A0, base); - base = TCG_REG_A0; + + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - if (guest_base) { - if (guest_base == (int16_t)guest_base) { - tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base); - } else { - tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base, - TCG_GUEST_BASE_REG); - } - base = TCG_REG_A0; - } - if (use_mips32r6_instructions) { - if (a_bits) { - tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); - } - tcg_out_qemu_st_direct(s, datalo, datahi, base, opc); - } else { - if (a_bits && a_bits != s_bits) { - tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); - } - if (a_bits >= s_bits) { - tcg_out_qemu_st_direct(s, datalo, datahi, base, opc); - } else { - tcg_out_qemu_st_unalign(s, datalo, datahi, base, opc); - } - } -#endif } static void tcg_out_mb(TCGContext *s, TCGArg a0) From 7069e036999b78f83f0303e275c1c4413dfa10a3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 23 Apr 2023 20:26:05 +0100 Subject: [PATCH 17/53] tcg/ppc: Introduce prepare_host_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment, and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st into one function that returns HostAddress and TCGLabelQemuLdst structures. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.c.inc | 381 ++++++++++++++++++--------------------- 1 file changed, 172 insertions(+), 209 deletions(-) diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index cd473deb36..0469e299a0 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2003,140 +2003,6 @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = { [MO_BEUQ] = helper_be_stq_mmu, }; -/* We expect to use a 16-bit negative offset from ENV. */ -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768); - -/* Perform the TLB load and compare. Places the result of the comparison - in CR7, loads the addend of the TLB into R3, and returns the register - containing the guest address (zero-extended into R4). Clobbers R0 and R2. */ - -static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc, - TCGReg addrlo, TCGReg addrhi, - int mem_index, bool is_read) -{ - int cmp_off - = (is_read - ? offsetof(CPUTLBEntry, addr_read) - : offsetof(CPUTLBEntry, addr_write)); - int fast_off = TLB_MASK_TABLE_OFS(mem_index); - int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); - int table_off = fast_off + offsetof(CPUTLBDescFast, table); - unsigned s_bits = opc & MO_SIZE; - unsigned a_bits = get_alignment_bits(opc); - - /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off); - - /* Extract the page index, shifted into place for tlb index. */ - if (TCG_TARGET_REG_BITS == 32) { - tcg_out_shri32(s, TCG_REG_TMP1, addrlo, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - } else { - tcg_out_shri64(s, TCG_REG_TMP1, addrlo, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - } - tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1)); - - /* Load the TLB comparator. */ - if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) { - uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32 - ? LWZUX : LDUX); - tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4)); - } else { - tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4)); - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4); - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off); - } else { - tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off); - } - } - - /* Load the TLB addend for use on the fast path. Do this asap - to minimize any load use delay. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, - offsetof(CPUTLBEntry, addend)); - - /* Clear the non-page, non-alignment bits from the address */ - if (TCG_TARGET_REG_BITS == 32) { - /* We don't support unaligned accesses on 32-bits. - * Preserve the bottom bits and thus trigger a comparison - * failure on unaligned accesses. - */ - if (a_bits < s_bits) { - a_bits = s_bits; - } - tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, - (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS); - } else { - TCGReg t = addrlo; - - /* If the access is unaligned, we need to make sure we fail if we - * cross a page boundary. The trick is to add the access size-1 - * to the address before masking the low bits. That will make the - * address overflow to the next page if we cross a page boundary, - * which will then force a mismatch of the TLB compare. - */ - if (a_bits < s_bits) { - unsigned a_mask = (1 << a_bits) - 1; - unsigned s_mask = (1 << s_bits) - 1; - tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask)); - t = TCG_REG_R0; - } - - /* Mask the address for the requested alignment. */ - if (TARGET_LONG_BITS == 32) { - tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0, - (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS); - /* Zero-extend the address for use in the final address. */ - tcg_out_ext32u(s, TCG_REG_R4, addrlo); - addrlo = TCG_REG_R4; - } else if (a_bits == 0) { - tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS); - } else { - tcg_out_rld(s, RLDICL, TCG_REG_R0, t, - 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits); - tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); - } - } - - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, - 0, 7, TCG_TYPE_I32); - tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32); - tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); - } else { - tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, - 0, 7, TCG_TYPE_TL); - } - - return addrlo; -} - -/* Record the context of a call to the out of line helper code for the slow - path for a load or store, so that we can later generate the correct - helper code. */ -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, - TCGType type, MemOpIdx oi, - TCGReg datalo_reg, TCGReg datahi_reg, - TCGReg addrlo_reg, TCGReg addrhi_reg, - tcg_insn_unit *raddr, tcg_insn_unit *lptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->type = type; - label->oi = oi; - label->datalo_reg = datalo_reg; - label->datahi_reg = datahi_reg; - label->addrlo_reg = addrlo_reg; - label->addrhi_reg = addrhi_reg; - label->raddr = tcg_splitwx_to_rx(raddr); - label->label_ptr[0] = lptr; -} - static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { MemOpIdx oi = lb->oi; @@ -2225,27 +2091,6 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) return true; } #else - -static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo, - TCGReg addrhi, unsigned a_bits) -{ - unsigned a_mask = (1 << a_bits) - 1; - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - - /* We are expecting a_bits to max out at 7, much lower than ANDI. */ - tcg_debug_assert(a_bits < 16); - tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, a_mask)); - - label->label_ptr[0] = s->code_ptr; - tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK); - - label->raddr = tcg_splitwx_to_rx(s->code_ptr); -} - static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) { if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { @@ -2294,37 +2139,171 @@ typedef struct { TCGReg index; } HostAddress; +/* + * For softmmu, perform the TLB load and compare. + * For useronly, perform any required alignment tests. + * In both cases, return a TCGLabelQemuLdst structure if the slow path + * is required and fill in @h with the host address for the fast path. + */ +static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, + TCGReg addrlo, TCGReg addrhi, + MemOpIdx oi, bool is_ld) +{ + TCGLabelQemuLdst *ldst = NULL; + MemOp opc = get_memop(oi); + unsigned a_bits = get_alignment_bits(opc); + +#ifdef CONFIG_SOFTMMU + int mem_index = get_mmuidx(oi); + int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write); + int fast_off = TLB_MASK_TABLE_OFS(mem_index); + int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); + int table_off = fast_off + offsetof(CPUTLBDescFast, table); + unsigned s_bits = opc & MO_SIZE; + + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + + /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off); + + /* Extract the page index, shifted into place for tlb index. */ + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_shri32(s, TCG_REG_TMP1, addrlo, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + } else { + tcg_out_shri64(s, TCG_REG_TMP1, addrlo, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + } + tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1)); + + /* Load the TLB comparator. */ + if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) { + uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32 + ? LWZUX : LDUX); + tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4)); + } else { + tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4)); + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4); + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off); + } else { + tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off); + } + } + + /* + * Load the TLB addend for use on the fast path. + * Do this asap to minimize any load use delay. + */ + h->base = TCG_REG_R3; + tcg_out_ld(s, TCG_TYPE_PTR, h->base, TCG_REG_R3, + offsetof(CPUTLBEntry, addend)); + + /* Clear the non-page, non-alignment bits from the address */ + if (TCG_TARGET_REG_BITS == 32) { + /* + * We don't support unaligned accesses on 32-bits. + * Preserve the bottom bits and thus trigger a comparison + * failure on unaligned accesses. + */ + if (a_bits < s_bits) { + a_bits = s_bits; + } + tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, + (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS); + } else { + TCGReg t = addrlo; + + /* + * If the access is unaligned, we need to make sure we fail if we + * cross a page boundary. The trick is to add the access size-1 + * to the address before masking the low bits. That will make the + * address overflow to the next page if we cross a page boundary, + * which will then force a mismatch of the TLB compare. + */ + if (a_bits < s_bits) { + unsigned a_mask = (1 << a_bits) - 1; + unsigned s_mask = (1 << s_bits) - 1; + tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask)); + t = TCG_REG_R0; + } + + /* Mask the address for the requested alignment. */ + if (TARGET_LONG_BITS == 32) { + tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0, + (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS); + /* Zero-extend the address for use in the final address. */ + tcg_out_ext32u(s, TCG_REG_R4, addrlo); + addrlo = TCG_REG_R4; + } else if (a_bits == 0) { + tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS); + } else { + tcg_out_rld(s, RLDICL, TCG_REG_R0, t, + 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits); + tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); + } + } + h->index = addrlo; + + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, + 0, 7, TCG_TYPE_I32); + tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32); + tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); + } else { + tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, + 0, 7, TCG_TYPE_TL); + } + + /* Load a pointer into the current opcode w/conditional branch-link. */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); +#else + if (a_bits) { + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + + /* We are expecting a_bits to max out at 7, much lower than ANDI. */ + tcg_debug_assert(a_bits < 16); + tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1)); + + ldst->label_ptr[0] = s->code_ptr; + tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK); + } + + h->base = guest_base ? TCG_GUEST_BASE_REG : 0; + h->index = addrlo; + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); + h->index = TCG_REG_TMP1; + } +#endif + + return ldst; +} + static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, TCGReg addrlo, TCGReg addrhi, MemOpIdx oi, TCGType data_type) { MemOp opc = get_memop(oi); - MemOp s_bits = opc & MO_SIZE; + TCGLabelQemuLdst *ldst; HostAddress h; -#ifdef CONFIG_SOFTMMU - tcg_insn_unit *label_ptr; + ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true); - h.index = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), true); - h.base = TCG_REG_R3; - - /* Load a pointer into the current opcode w/conditional branch-link. */ - label_ptr = s->code_ptr; - tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); -#else /* !CONFIG_SOFTMMU */ - unsigned a_bits = get_alignment_bits(opc); - if (a_bits) { - tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); - } - h.base = guest_base ? TCG_GUEST_BASE_REG : 0; - h.index = addrlo; - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); - h.index = TCG_REG_TMP1; - } -#endif - - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { + if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { if (opc & MO_BSWAP) { tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index)); @@ -2357,10 +2336,12 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, } } -#ifdef CONFIG_SOFTMMU - add_qemu_ldst_label(s, true, data_type, oi, datalo, datahi, - addrlo, addrhi, s->code_ptr, label_ptr); -#endif + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } } static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, @@ -2368,32 +2349,12 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, MemOpIdx oi, TCGType data_type) { MemOp opc = get_memop(oi); - MemOp s_bits = opc & MO_SIZE; + TCGLabelQemuLdst *ldst; HostAddress h; -#ifdef CONFIG_SOFTMMU - tcg_insn_unit *label_ptr; + ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false); - h.index = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), false); - h.base = TCG_REG_R3; - - /* Load a pointer into the current opcode w/conditional branch-link. */ - label_ptr = s->code_ptr; - tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); -#else /* !CONFIG_SOFTMMU */ - unsigned a_bits = get_alignment_bits(opc); - if (a_bits) { - tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits); - } - h.base = guest_base ? TCG_GUEST_BASE_REG : 0; - h.index = addrlo; - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); - h.index = TCG_REG_TMP1; - } -#endif - - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { + if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { if (opc & MO_BSWAP) { tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index)); @@ -2418,10 +2379,12 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, } } -#ifdef CONFIG_SOFTMMU - add_qemu_ldst_label(s, false, data_type, oi, datalo, datahi, - addrlo, addrhi, s->code_ptr, label_ptr); -#endif + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } } static void tcg_out_nop_fill(tcg_insn_unit *p, int count) From 001dddfe0ed9bbc099deab1fde829fc9292f4d25 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 23 Apr 2023 21:06:38 +0100 Subject: [PATCH 18/53] tcg/riscv: Introduce prepare_host_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment, and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st into one function that returns TCGReg and TCGLabelQemuLdst. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target.c.inc | 253 +++++++++++++++++-------------------- 1 file changed, 114 insertions(+), 139 deletions(-) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index a4cf60ca75..2b2d313fe2 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -899,10 +899,6 @@ static void * const qemu_st_helpers[MO_SIZE + 1] = { #endif }; -/* We expect to use a 12-bit negative offset from ENV. */ -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11)); - static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) { tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0); @@ -910,76 +906,6 @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) tcg_debug_assert(ok); } -static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, MemOpIdx oi, - tcg_insn_unit **label_ptr, bool is_load) -{ - MemOp opc = get_memop(oi); - unsigned s_bits = opc & MO_SIZE; - unsigned a_bits = get_alignment_bits(opc); - tcg_target_long compare_mask; - int mem_index = get_mmuidx(oi); - int fast_ofs = TLB_MASK_TABLE_OFS(mem_index); - int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask); - int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table); - TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0; - - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs); - - tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0); - tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1); - - /* Load the tlb comparator and the addend. */ - tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2, - is_load ? offsetof(CPUTLBEntry, addr_read) - : offsetof(CPUTLBEntry, addr_write)); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2, - offsetof(CPUTLBEntry, addend)); - - /* We don't support unaligned accesses. */ - if (a_bits < s_bits) { - a_bits = s_bits; - } - /* Clear the non-page, non-alignment bits from the address. */ - compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1); - if (compare_mask == sextreg(compare_mask, 0, 12)) { - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr, compare_mask); - } else { - tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask); - tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr); - } - - /* Compare masked address with the TLB entry. */ - label_ptr[0] = s->code_ptr; - tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0); - - /* TLB Hit - translate address using addend. */ - if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_TMP0, addr); - addr = TCG_REG_TMP0; - } - tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr); - return TCG_REG_TMP0; -} - -static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi, - TCGType data_type, TCGReg data_reg, - TCGReg addr_reg, void *raddr, - tcg_insn_unit **label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->type = data_type; - label->datalo_reg = data_reg; - label->addrlo_reg = addr_reg; - label->raddr = tcg_splitwx_to_rx(raddr); - label->label_ptr[0] = label_ptr[0]; -} - static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { MemOpIdx oi = l->oi; @@ -1037,26 +963,6 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) return true; } #else - -static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg, - unsigned a_bits) -{ - unsigned a_mask = (1 << a_bits) - 1; - TCGLabelQemuLdst *l = new_ldst_label(s); - - l->is_ld = is_ld; - l->addrlo_reg = addr_reg; - - /* We are expecting a_bits to max out at 7, so we can always use andi. */ - tcg_debug_assert(a_bits < 12); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask); - - l->label_ptr[0] = s->code_ptr; - tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0); - - l->raddr = tcg_splitwx_to_rx(s->code_ptr); -} - static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) { /* resolve label address */ @@ -1083,9 +989,108 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { return tcg_out_fail_alignment(s, l); } - #endif /* CONFIG_SOFTMMU */ +/* + * For softmmu, perform the TLB load and compare. + * For useronly, perform any required alignment tests. + * In both cases, return a TCGLabelQemuLdst structure if the slow path + * is required and fill in @h with the host address for the fast path. + */ +static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase, + TCGReg addr_reg, MemOpIdx oi, + bool is_ld) +{ + TCGLabelQemuLdst *ldst = NULL; + MemOp opc = get_memop(oi); + unsigned a_bits = get_alignment_bits(opc); + unsigned a_mask = (1u << a_bits) - 1; + +#ifdef CONFIG_SOFTMMU + unsigned s_bits = opc & MO_SIZE; + int mem_index = get_mmuidx(oi); + int fast_ofs = TLB_MASK_TABLE_OFS(mem_index); + int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask); + int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table); + TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0; + tcg_target_long compare_mask; + + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addr_reg; + + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11)); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs); + + tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0); + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1); + + /* Load the tlb comparator and the addend. */ + tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2, + is_ld ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write)); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2, + offsetof(CPUTLBEntry, addend)); + + /* We don't support unaligned accesses. */ + if (a_bits < s_bits) { + a_bits = s_bits; + } + /* Clear the non-page, non-alignment bits from the address. */ + compare_mask = (tcg_target_long)TARGET_PAGE_MASK | a_mask; + if (compare_mask == sextreg(compare_mask, 0, 12)) { + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, compare_mask); + } else { + tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask); + tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg); + } + + /* Compare masked address with the TLB entry. */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0); + + /* TLB Hit - translate address using addend. */ + if (TARGET_LONG_BITS == 32) { + tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg); + addr_reg = TCG_REG_TMP0; + } + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr_reg); + *pbase = TCG_REG_TMP0; +#else + if (a_mask) { + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addr_reg; + + /* We are expecting a_bits max 7, so we can always use andi. */ + tcg_debug_assert(a_bits < 12); + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask); + + ldst->label_ptr[0] = s->code_ptr; + tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0); + } + + TCGReg base = addr_reg; + if (TARGET_LONG_BITS == 32) { + tcg_out_ext32u(s, TCG_REG_TMP0, base); + base = TCG_REG_TMP0; + } + if (guest_base != 0) { + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base); + base = TCG_REG_TMP0; + } + *pbase = base; +#endif + + return ldst; +} + static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val, TCGReg base, MemOp opc, TCGType type) { @@ -1125,32 +1130,17 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val, static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, MemOpIdx oi, TCGType data_type) { - MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; TCGReg base; -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label_ptr[1]; + ldst = prepare_host_addr(s, &base, addr_reg, oi, true); + tcg_out_qemu_ld_direct(s, data_reg, base, get_memop(oi), data_type); - base = tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 1); - tcg_out_qemu_ld_direct(s, data_reg, base, opc, data_type); - add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg, - s->code_ptr, label_ptr); -#else - unsigned a_bits = get_alignment_bits(opc); - if (a_bits) { - tcg_out_test_alignment(s, true, addr_reg, a_bits); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = data_reg; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - base = addr_reg; - if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_TMP0, base); - base = TCG_REG_TMP0; - } - if (guest_base != 0) { - tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base); - base = TCG_REG_TMP0; - } - tcg_out_qemu_ld_direct(s, data_reg, base, opc, data_type); -#endif } static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val, @@ -1180,32 +1170,17 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val, static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, MemOpIdx oi, TCGType data_type) { - MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; TCGReg base; -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label_ptr[1]; + ldst = prepare_host_addr(s, &base, addr_reg, oi, false); + tcg_out_qemu_st_direct(s, data_reg, base, get_memop(oi)); - base = tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 0); - tcg_out_qemu_st_direct(s, data_reg, base, opc); - add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg, - s->code_ptr, label_ptr); -#else - unsigned a_bits = get_alignment_bits(opc); - if (a_bits) { - tcg_out_test_alignment(s, false, addr_reg, a_bits); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = data_reg; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - base = addr_reg; - if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_TMP0, base); - base = TCG_REG_TMP0; - } - if (guest_base != 0) { - tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base); - base = TCG_REG_TMP0; - } - tcg_out_qemu_st_direct(s, data_reg, base, opc); -#endif } static const tcg_insn_unit *tb_ret_addr; From 0741b25e4ee703c66b648eea5650ca833eae03db Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 23 Apr 2023 21:46:46 +0100 Subject: [PATCH 19/53] tcg/s390x: Introduce prepare_host_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment, tcg_prepare_user_ldst, and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st into one function that returns HostAddress and TCGLabelQemuLdst structures. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/s390x/tcg-target.c.inc | 263 ++++++++++++++++--------------------- 1 file changed, 113 insertions(+), 150 deletions(-) diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index da7ee5b085..c3157d22be 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1718,78 +1718,6 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data, } #if defined(CONFIG_SOFTMMU) -/* We're expecting to use a 20-bit negative offset on the tlb memory ops. */ -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19)); - -/* Load and compare a TLB entry, leaving the flags set. Loads the TLB - addend into R2. Returns a register with the santitized guest address. */ -static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, - int mem_index, bool is_ld) -{ - unsigned s_bits = opc & MO_SIZE; - unsigned a_bits = get_alignment_bits(opc); - unsigned s_mask = (1 << s_bits) - 1; - unsigned a_mask = (1 << a_bits) - 1; - int fast_off = TLB_MASK_TABLE_OFS(mem_index); - int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); - int table_off = fast_off + offsetof(CPUTLBDescFast, table); - int ofs, a_off; - uint64_t tlb_mask; - - tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off); - tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off); - - /* For aligned accesses, we check the first byte and include the alignment - bits within the address. For unaligned access, we check that we don't - cross pages using the address of the last byte of the access. */ - a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask); - tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; - if (a_off == 0) { - tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask); - } else { - tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off); - tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); - } - - if (is_ld) { - ofs = offsetof(CPUTLBEntry, addr_read); - } else { - ofs = offsetof(CPUTLBEntry, addr_write); - } - if (TARGET_LONG_BITS == 32) { - tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs); - } else { - tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs); - } - - tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE, - offsetof(CPUTLBEntry, addend)); - - if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_R3, addr_reg); - return TCG_REG_R3; - } - return addr_reg; -} - -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi, - TCGType type, TCGReg data, TCGReg addr, - tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->type = type; - label->datalo_reg = data; - label->addrlo_reg = addr; - label->raddr = tcg_splitwx_to_rx(raddr); - label->label_ptr[0] = label_ptr; -} - static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { TCGReg addr_reg = lb->addrlo_reg; @@ -1842,26 +1770,6 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) return true; } #else -static void tcg_out_test_alignment(TCGContext *s, bool is_ld, - TCGReg addrlo, unsigned a_bits) -{ - unsigned a_mask = (1 << a_bits) - 1; - TCGLabelQemuLdst *l = new_ldst_label(s); - - l->is_ld = is_ld; - l->addrlo_reg = addrlo; - - /* We are expecting a_bits to max out at 7, much lower than TMLL. */ - tcg_debug_assert(a_bits < 16); - tcg_out_insn(s, RI, TMLL, addrlo, a_mask); - - tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */ - l->label_ptr[0] = s->code_ptr; - s->code_ptr += 1; - - l->raddr = tcg_splitwx_to_rx(s->code_ptr); -} - static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) { if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL, @@ -1888,91 +1796,146 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { return tcg_out_fail_alignment(s, l); } +#endif /* CONFIG_SOFTMMU */ -static HostAddress tcg_prepare_user_ldst(TCGContext *s, TCGReg addr_reg) +/* + * For softmmu, perform the TLB load and compare. + * For useronly, perform any required alignment tests. + * In both cases, return a TCGLabelQemuLdst structure if the slow path + * is required and fill in @h with the host address for the fast path. + */ +static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, + TCGReg addr_reg, MemOpIdx oi, + bool is_ld) { - TCGReg index; - int disp; + TCGLabelQemuLdst *ldst = NULL; + MemOp opc = get_memop(oi); + unsigned a_bits = get_alignment_bits(opc); + unsigned a_mask = (1u << a_bits) - 1; +#ifdef CONFIG_SOFTMMU + unsigned s_bits = opc & MO_SIZE; + unsigned s_mask = (1 << s_bits) - 1; + int mem_index = get_mmuidx(oi); + int fast_off = TLB_MASK_TABLE_OFS(mem_index); + int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); + int table_off = fast_off + offsetof(CPUTLBDescFast, table); + int ofs, a_off; + uint64_t tlb_mask; + + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addr_reg; + + tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19)); + tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off); + tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off); + + /* + * For aligned accesses, we check the first byte and include the alignment + * bits within the address. For unaligned access, we check that we don't + * cross pages using the address of the last byte of the access. + */ + a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask); + tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; + if (a_off == 0) { + tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask); + } else { + tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off); + tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); + } + + if (is_ld) { + ofs = offsetof(CPUTLBEntry, addr_read); + } else { + ofs = offsetof(CPUTLBEntry, addr_write); + } + if (TARGET_LONG_BITS == 32) { + tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs); + } else { + tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs); + } + + tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); + ldst->label_ptr[0] = s->code_ptr++; + + h->index = TCG_REG_R2; + tcg_out_insn(s, RXY, LG, h->index, TCG_REG_R2, TCG_REG_NONE, + offsetof(CPUTLBEntry, addend)); + + h->base = addr_reg; + if (TARGET_LONG_BITS == 32) { + tcg_out_ext32u(s, TCG_REG_R3, addr_reg); + h->base = TCG_REG_R3; + } + h->disp = 0; +#else + if (a_mask) { + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addr_reg; + + /* We are expecting a_bits to max out at 7, much lower than TMLL. */ + tcg_debug_assert(a_bits < 16); + tcg_out_insn(s, RI, TMLL, addr_reg, a_mask); + + tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */ + ldst->label_ptr[0] = s->code_ptr++; + } + + h->base = addr_reg; if (TARGET_LONG_BITS == 32) { tcg_out_ext32u(s, TCG_TMP0, addr_reg); - addr_reg = TCG_TMP0; + h->base = TCG_TMP0; } if (guest_base < 0x80000) { - index = TCG_REG_NONE; - disp = guest_base; + h->index = TCG_REG_NONE; + h->disp = guest_base; } else { - index = TCG_GUEST_BASE_REG; - disp = 0; + h->index = TCG_GUEST_BASE_REG; + h->disp = 0; } - return (HostAddress){ .base = addr_reg, .index = index, .disp = disp }; +#endif + + return ldst; } -#endif /* CONFIG_SOFTMMU */ static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, MemOpIdx oi, TCGType data_type) { - MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; HostAddress h; -#ifdef CONFIG_SOFTMMU - unsigned mem_index = get_mmuidx(oi); - tcg_insn_unit *label_ptr; + ldst = prepare_host_addr(s, &h, addr_reg, oi, true); + tcg_out_qemu_ld_direct(s, get_memop(oi), data_reg, h); - h.base = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1); - h.index = TCG_REG_R2; - h.disp = 0; - - tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); - label_ptr = s->code_ptr; - s->code_ptr += 1; - - tcg_out_qemu_ld_direct(s, opc, data_reg, h); - - add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg, - s->code_ptr, label_ptr); -#else - unsigned a_bits = get_alignment_bits(opc); - - if (a_bits) { - tcg_out_test_alignment(s, true, addr_reg, a_bits); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = data_reg; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - h = tcg_prepare_user_ldst(s, addr_reg); - tcg_out_qemu_ld_direct(s, opc, data_reg, h); -#endif } static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, MemOpIdx oi, TCGType data_type) { - MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; HostAddress h; -#ifdef CONFIG_SOFTMMU - unsigned mem_index = get_mmuidx(oi); - tcg_insn_unit *label_ptr; + ldst = prepare_host_addr(s, &h, addr_reg, oi, false); + tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h); - h.base = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0); - h.index = TCG_REG_R2; - h.disp = 0; - - tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); - label_ptr = s->code_ptr; - s->code_ptr += 1; - - tcg_out_qemu_st_direct(s, opc, data_reg, h); - - add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg, - s->code_ptr, label_ptr); -#else - unsigned a_bits = get_alignment_bits(opc); - - if (a_bits) { - tcg_out_test_alignment(s, false, addr_reg, a_bits); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = data_reg; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - h = tcg_prepare_user_ldst(s, addr_reg); - tcg_out_qemu_st_direct(s, opc, data_reg, h); -#endif } static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) From 8429a1ca8f315394b2a68b96812b1dc9c60070fd Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 9 Apr 2023 22:59:09 -0700 Subject: [PATCH 20/53] tcg: Add routines for calling slow-path helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tcg_out_ld_helper_args, tcg_out_ld_helper_ret, and tcg_out_st_helper_args. These and their subroutines use the existing knowledge of the host function call abi to load the function call arguments and return results. These will be used to simplify the backends in turn. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/tcg.c | 475 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 471 insertions(+), 4 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index 057423c121..88fe01f59f 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -181,6 +181,22 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct); static int tcg_out_ldst_finalize(TCGContext *s); #endif +typedef struct TCGLdstHelperParam { + TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg); + unsigned ntmp; + int tmp[3]; +} TCGLdstHelperParam; + +static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, + const TCGLdstHelperParam *p) + __attribute__((unused)); +static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l, + bool load_sign, const TCGLdstHelperParam *p) + __attribute__((unused)); +static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, + const TCGLdstHelperParam *p) + __attribute__((unused)); + TCGContext tcg_init_ctx; __thread TCGContext *tcg_ctx; @@ -366,8 +382,17 @@ void tcg_raise_tb_overflow(TCGContext *s) siglongjmp(s->jmp_trans, -2); } +/* + * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext. + * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg. + * + * However, tcg_out_helper_load_slots reuses this field to hold an + * argument slot number (which may designate a argument register or an + * argument stack slot), converting to TCGReg once all arguments that + * are destined for the stack are processed. + */ typedef struct TCGMovExtend { - TCGReg dst; + unsigned dst; TCGReg src; TCGType dst_type; TCGType src_type; @@ -459,9 +484,8 @@ static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) * between the sources and destinations. */ -static void __attribute__((unused)) -tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, - const TCGMovExtend *i2, int scratch) +static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, + const TCGMovExtend *i2, int scratch) { TCGReg src1 = i1->src; TCGReg src2 = i2->src; @@ -715,6 +739,58 @@ static TCGHelperInfo all_helpers[] = { }; static GHashTable *helper_table; +/* + * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions, + * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N. + * We only use these for layout in tcg_out_ld_helper_ret and + * tcg_out_st_helper_args, and share them between several of + * the helpers, with the end result that it's easier to build manually. + */ + +#if TCG_TARGET_REG_BITS == 32 +# define dh_typecode_ttl dh_typecode_i32 +#else +# define dh_typecode_ttl dh_typecode_i64 +#endif + +static TCGHelperInfo info_helper_ld32_mmu = { + .flags = TCG_CALL_NO_WG, + .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */ + | dh_typemask(env, 1) + | dh_typemask(tl, 2) /* target_ulong addr */ + | dh_typemask(i32, 3) /* unsigned oi */ + | dh_typemask(ptr, 4) /* uintptr_t ra */ +}; + +static TCGHelperInfo info_helper_ld64_mmu = { + .flags = TCG_CALL_NO_WG, + .typemask = dh_typemask(i64, 0) /* return uint64_t */ + | dh_typemask(env, 1) + | dh_typemask(tl, 2) /* target_ulong addr */ + | dh_typemask(i32, 3) /* unsigned oi */ + | dh_typemask(ptr, 4) /* uintptr_t ra */ +}; + +static TCGHelperInfo info_helper_st32_mmu = { + .flags = TCG_CALL_NO_WG, + .typemask = dh_typemask(void, 0) + | dh_typemask(env, 1) + | dh_typemask(tl, 2) /* target_ulong addr */ + | dh_typemask(i32, 3) /* uint32_t data */ + | dh_typemask(i32, 4) /* unsigned oi */ + | dh_typemask(ptr, 5) /* uintptr_t ra */ +}; + +static TCGHelperInfo info_helper_st64_mmu = { + .flags = TCG_CALL_NO_WG, + .typemask = dh_typemask(void, 0) + | dh_typemask(env, 1) + | dh_typemask(tl, 2) /* target_ulong addr */ + | dh_typemask(i64, 3) /* uint64_t data */ + | dh_typemask(i32, 4) /* unsigned oi */ + | dh_typemask(ptr, 5) /* uintptr_t ra */ +}; + #ifdef CONFIG_TCG_INTERPRETER static ffi_type *typecode_to_ffi(int argmask) { @@ -1126,6 +1202,11 @@ static void tcg_context_init(unsigned max_cpus) (gpointer)&all_helpers[i]); } + init_call_layout(&info_helper_ld32_mmu); + init_call_layout(&info_helper_ld64_mmu); + init_call_layout(&info_helper_st32_mmu); + init_call_layout(&info_helper_st64_mmu); + #ifdef CONFIG_TCG_INTERPRETER init_ffi_layouts(); #endif @@ -5011,6 +5092,392 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) } } +/* + * Similarly for qemu_ld/st slow path helpers. + * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously, + * using only the provided backend tcg_out_* functions. + */ + +static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot) +{ + int ofs = arg_slot_stk_ofs(slot); + + /* + * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not + * require extension to uint64_t, adjust the address for uint32_t. + */ + if (HOST_BIG_ENDIAN && + TCG_TARGET_REG_BITS == 64 && + type == TCG_TYPE_I32) { + ofs += 4; + } + return ofs; +} + +static void tcg_out_helper_load_regs(TCGContext *s, + unsigned nmov, TCGMovExtend *mov, + unsigned ntmp, const int *tmp) +{ + switch (nmov) { + default: + /* The backend must have provided enough temps for the worst case. */ + tcg_debug_assert(ntmp + 1 >= nmov); + + for (unsigned i = nmov - 1; i >= 2; --i) { + TCGReg dst = mov[i].dst; + + for (unsigned j = 0; j < i; ++j) { + if (dst == mov[j].src) { + /* + * Conflict. + * Copy the source to a temporary, recurse for the + * remaining moves, perform the extension from our + * scratch on the way out. + */ + TCGReg scratch = tmp[--ntmp]; + tcg_out_mov(s, mov[i].src_type, scratch, mov[i].src); + mov[i].src = scratch; + + tcg_out_helper_load_regs(s, i, mov, ntmp, tmp); + tcg_out_movext1(s, &mov[i]); + return; + } + } + + /* No conflicts: perform this move and continue. */ + tcg_out_movext1(s, &mov[i]); + } + /* fall through for the final two moves */ + + case 2: + tcg_out_movext2(s, mov, mov + 1, ntmp ? tmp[0] : -1); + return; + case 1: + tcg_out_movext1(s, mov); + return; + case 0: + g_assert_not_reached(); + } +} + +static void tcg_out_helper_load_slots(TCGContext *s, + unsigned nmov, TCGMovExtend *mov, + const TCGLdstHelperParam *parm) +{ + unsigned i; + + /* + * Start from the end, storing to the stack first. + * This frees those registers, so we need not consider overlap. + */ + for (i = nmov; i-- > 0; ) { + unsigned slot = mov[i].dst; + + if (arg_slot_reg_p(slot)) { + goto found_reg; + } + + TCGReg src = mov[i].src; + TCGType dst_type = mov[i].dst_type; + MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64; + + /* The argument is going onto the stack; extend into scratch. */ + if ((mov[i].src_ext & MO_SIZE) != dst_mo) { + tcg_debug_assert(parm->ntmp != 0); + mov[i].dst = src = parm->tmp[0]; + tcg_out_movext1(s, &mov[i]); + } + + tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK, + tcg_out_helper_stk_ofs(dst_type, slot)); + } + return; + + found_reg: + /* + * The remaining arguments are in registers. + * Convert slot numbers to argument registers. + */ + nmov = i + 1; + for (i = 0; i < nmov; ++i) { + mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst]; + } + tcg_out_helper_load_regs(s, nmov, mov, parm->ntmp, parm->tmp); +} + +static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot, + TCGType type, tcg_target_long imm, + const TCGLdstHelperParam *parm) +{ + if (arg_slot_reg_p(slot)) { + tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm); + } else { + int ofs = tcg_out_helper_stk_ofs(type, slot); + if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) { + tcg_debug_assert(parm->ntmp != 0); + tcg_out_movi(s, type, parm->tmp[0], imm); + tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs); + } + } +} + +static void tcg_out_helper_load_common_args(TCGContext *s, + const TCGLabelQemuLdst *ldst, + const TCGLdstHelperParam *parm, + const TCGHelperInfo *info, + unsigned next_arg) +{ + TCGMovExtend ptr_mov = { + .dst_type = TCG_TYPE_PTR, + .src_type = TCG_TYPE_PTR, + .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64 + }; + const TCGCallArgumentLoc *loc = &info->in[0]; + TCGType type; + unsigned slot; + tcg_target_ulong imm; + + /* + * Handle env, which is always first. + */ + ptr_mov.dst = loc->arg_slot; + ptr_mov.src = TCG_AREG0; + tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); + + /* + * Handle oi. + */ + imm = ldst->oi; + loc = &info->in[next_arg]; + type = TCG_TYPE_I32; + switch (loc->kind) { + case TCG_CALL_ARG_NORMAL: + break; + case TCG_CALL_ARG_EXTEND_U: + case TCG_CALL_ARG_EXTEND_S: + /* No extension required for MemOpIdx. */ + tcg_debug_assert(imm <= INT32_MAX); + type = TCG_TYPE_REG; + break; + default: + g_assert_not_reached(); + } + tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm); + next_arg++; + + /* + * Handle ra. + */ + loc = &info->in[next_arg]; + slot = loc->arg_slot; + if (parm->ra_gen) { + int arg_reg = -1; + TCGReg ra_reg; + + if (arg_slot_reg_p(slot)) { + arg_reg = tcg_target_call_iarg_regs[slot]; + } + ra_reg = parm->ra_gen(s, ldst, arg_reg); + + ptr_mov.dst = slot; + ptr_mov.src = ra_reg; + tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); + } else { + imm = (uintptr_t)ldst->raddr; + tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm); + } +} + +static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov, + const TCGCallArgumentLoc *loc, + TCGType dst_type, TCGType src_type, + TCGReg lo, TCGReg hi) +{ + if (dst_type <= TCG_TYPE_REG) { + MemOp src_ext; + + switch (loc->kind) { + case TCG_CALL_ARG_NORMAL: + src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64; + break; + case TCG_CALL_ARG_EXTEND_U: + dst_type = TCG_TYPE_REG; + src_ext = MO_UL; + break; + case TCG_CALL_ARG_EXTEND_S: + dst_type = TCG_TYPE_REG; + src_ext = MO_SL; + break; + default: + g_assert_not_reached(); + } + + mov[0].dst = loc->arg_slot; + mov[0].dst_type = dst_type; + mov[0].src = lo; + mov[0].src_type = src_type; + mov[0].src_ext = src_ext; + return 1; + } + + assert(TCG_TARGET_REG_BITS == 32); + + mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot; + mov[0].src = lo; + mov[0].dst_type = TCG_TYPE_I32; + mov[0].src_type = TCG_TYPE_I32; + mov[0].src_ext = MO_32; + + mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot; + mov[1].src = hi; + mov[1].dst_type = TCG_TYPE_I32; + mov[1].src_type = TCG_TYPE_I32; + mov[1].src_ext = MO_32; + + return 2; +} + +static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, + const TCGLdstHelperParam *parm) +{ + const TCGHelperInfo *info; + const TCGCallArgumentLoc *loc; + TCGMovExtend mov[2]; + unsigned next_arg, nmov; + MemOp mop = get_memop(ldst->oi); + + switch (mop & MO_SIZE) { + case MO_8: + case MO_16: + case MO_32: + info = &info_helper_ld32_mmu; + break; + case MO_64: + info = &info_helper_ld64_mmu; + break; + default: + g_assert_not_reached(); + } + + /* Defer env argument. */ + next_arg = 1; + + loc = &info->in[next_arg]; + nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL, + ldst->addrlo_reg, ldst->addrhi_reg); + next_arg += nmov; + + tcg_out_helper_load_slots(s, nmov, mov, parm); + + /* No special attention for 32 and 64-bit return values. */ + tcg_debug_assert(info->out_kind == TCG_CALL_RET_NORMAL); + + tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); +} + +static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst, + bool load_sign, + const TCGLdstHelperParam *parm) +{ + TCGMovExtend mov[2]; + + if (ldst->type <= TCG_TYPE_REG) { + MemOp mop = get_memop(ldst->oi); + + mov[0].dst = ldst->datalo_reg; + mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0); + mov[0].dst_type = ldst->type; + mov[0].src_type = TCG_TYPE_REG; + + /* + * If load_sign, then we allowed the helper to perform the + * appropriate sign extension to tcg_target_ulong, and all + * we need now is a plain move. + * + * If they do not, then we expect the relevant extension + * instruction to be no more expensive than a move, and + * we thus save the icache etc by only using one of two + * helper functions. + */ + if (load_sign || !(mop & MO_SIGN)) { + if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) { + mov[0].src_ext = MO_32; + } else { + mov[0].src_ext = MO_64; + } + } else { + mov[0].src_ext = mop & MO_SSIZE; + } + tcg_out_movext1(s, mov); + } else { + assert(TCG_TARGET_REG_BITS == 32); + + mov[0].dst = ldst->datalo_reg; + mov[0].src = + tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN); + mov[0].dst_type = TCG_TYPE_I32; + mov[0].src_type = TCG_TYPE_I32; + mov[0].src_ext = MO_32; + + mov[1].dst = ldst->datahi_reg; + mov[1].src = + tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN); + mov[1].dst_type = TCG_TYPE_REG; + mov[1].src_type = TCG_TYPE_REG; + mov[1].src_ext = MO_32; + + tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1); + } +} + +static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, + const TCGLdstHelperParam *parm) +{ + const TCGHelperInfo *info; + const TCGCallArgumentLoc *loc; + TCGMovExtend mov[4]; + TCGType data_type; + unsigned next_arg, nmov, n; + MemOp mop = get_memop(ldst->oi); + + switch (mop & MO_SIZE) { + case MO_8: + case MO_16: + case MO_32: + info = &info_helper_st32_mmu; + data_type = TCG_TYPE_I32; + break; + case MO_64: + info = &info_helper_st64_mmu; + data_type = TCG_TYPE_I64; + break; + default: + g_assert_not_reached(); + } + + /* Defer env argument. */ + next_arg = 1; + nmov = 0; + + /* Handle addr argument. */ + loc = &info->in[next_arg]; + n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL, + ldst->addrlo_reg, ldst->addrhi_reg); + next_arg += n; + nmov += n; + + /* Handle data argument. */ + loc = &info->in[next_arg]; + n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type, + ldst->datalo_reg, ldst->datahi_reg); + next_arg += n; + nmov += n; + tcg_debug_assert(nmov <= ARRAY_SIZE(mov)); + + tcg_out_helper_load_slots(s, nmov, mov, parm); + tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); +} + #ifdef CONFIG_PROFILER /* avoid copy/paste errors */ From da8ab70ad17762e1ad1c4de01f981adebd26f851 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 9 Apr 2023 23:03:55 -0700 Subject: [PATCH 21/53] tcg/i386: Convert tcg_out_qemu_ld_slow_path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use tcg_out_ld_helper_args and tcg_out_ld_helper_ret. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.c.inc | 71 +++++++++++++++------------------------ 1 file changed, 28 insertions(+), 43 deletions(-) diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 18b0e7997d..3508b9cc6c 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -1802,13 +1802,37 @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = { [MO_BEUQ] = helper_be_stq_mmu, }; +/* + * Because i686 has no register parameters and because x86_64 has xchg + * to handle addr/data register overlap, we have placed all input arguments + * before we need might need a scratch reg. + * + * Even then, a scratch is only needed for l->raddr. Rather than expose + * a general-purpose scratch when we don't actually know it's available, + * use the ra_gen hook to load into RAX if needed. + */ +#if TCG_TARGET_REG_BITS == 64 +static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg) +{ + if (arg < 0) { + arg = TCG_REG_RAX; + } + tcg_out_movi(s, TCG_TYPE_PTR, arg, (uintptr_t)l->raddr); + return arg; +} +static const TCGLdstHelperParam ldst_helper_param = { + .ra_gen = ldst_ra_gen +}; +#else +static const TCGLdstHelperParam ldst_helper_param = { }; +#endif + /* * Generate code for the slow path for a load at the end of block */ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { - MemOpIdx oi = l->oi; - MemOp opc = get_memop(oi); + MemOp opc = get_memop(l->oi); tcg_insn_unit **label_ptr = &l->label_ptr[0]; /* resolve label address */ @@ -1817,49 +1841,10 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4); } - if (TCG_TARGET_REG_BITS == 32) { - int ofs = 0; - - tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs); - ofs += 4; - - tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs); - ofs += 4; - - if (TARGET_LONG_BITS == 64) { - tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs); - ofs += 4; - } - - tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs); - ofs += 4; - - tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs); - } else { - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); - tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1], - l->addrlo_reg); - tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi); - tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3], - (uintptr_t)l->raddr); - } - + tcg_out_ld_helper_args(s, l, &ldst_helper_param); tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); + tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param); - if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { - TCGMovExtend ext[2] = { - { .dst = l->datalo_reg, .dst_type = TCG_TYPE_I32, - .src = TCG_REG_EAX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, - { .dst = l->datahi_reg, .dst_type = TCG_TYPE_I32, - .src = TCG_REG_EDX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, - }; - tcg_out_movext2(s, &ext[0], &ext[1], -1); - } else { - tcg_out_movext(s, l->type, l->datalo_reg, - TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_EAX); - } - - /* Jump to the code corresponding to next IR of qemu_st */ tcg_out_jmp(s, l->raddr); return true; } From 0036e54e7abfd8ea7f3aeb7db2daa23b25cc8094 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 9 Apr 2023 23:08:12 -0700 Subject: [PATCH 22/53] tcg/i386: Convert tcg_out_qemu_st_slow_path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use tcg_out_st_helper_args. This eliminates the use of a tail call to the store helper. This may or may not be an improvement, depending on the call/return branch prediction of the host microarchitecture. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.c.inc | 57 +++------------------------------------ 1 file changed, 4 insertions(+), 53 deletions(-) diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 3508b9cc6c..a01bfad773 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -1854,11 +1854,8 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) */ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { - MemOpIdx oi = l->oi; - MemOp opc = get_memop(oi); - MemOp s_bits = opc & MO_SIZE; + MemOp opc = get_memop(l->oi); tcg_insn_unit **label_ptr = &l->label_ptr[0]; - TCGReg retaddr; /* resolve label address */ tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4); @@ -1866,56 +1863,10 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4); } - if (TCG_TARGET_REG_BITS == 32) { - int ofs = 0; + tcg_out_st_helper_args(s, l, &ldst_helper_param); + tcg_out_branch(s, 1, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); - tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs); - ofs += 4; - - tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs); - ofs += 4; - - if (TARGET_LONG_BITS == 64) { - tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs); - ofs += 4; - } - - tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs); - ofs += 4; - - if (s_bits == MO_64) { - tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs); - ofs += 4; - } - - tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs); - ofs += 4; - - retaddr = TCG_REG_EAX; - tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); - tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs); - } else { - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); - tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1], - l->addrlo_reg); - tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32), - tcg_target_call_iarg_regs[2], l->datalo_reg); - tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi); - - if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) { - retaddr = tcg_target_call_iarg_regs[4]; - tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); - } else { - retaddr = TCG_REG_RAX; - tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); - tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, - TCG_TARGET_CALL_STACK_OFFSET); - } - } - - /* "Tail call" to the helper, with the return address back inline. */ - tcg_out_push(s, retaddr); - tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + tcg_out_jmp(s, l->raddr); return true; } #else From 6e96422b14a462da0089768baa91529f4192212c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 9 Apr 2023 23:21:20 -0700 Subject: [PATCH 23/53] tcg/aarch64: Convert tcg_out_qemu_{ld,st}_slow_path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret, and tcg_out_st_helper_args. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 40 +++++++++++++++--------------------- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 202b90c001..62dd22d73c 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1580,13 +1580,6 @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, } } -static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target) -{ - ptrdiff_t offset = tcg_pcrel_diff(s, target); - tcg_debug_assert(offset == sextract64(offset, 0, 21)); - tcg_out_insn(s, 3406, ADR, rd, offset); -} - typedef struct { TCGReg base; TCGReg index; @@ -1627,47 +1620,46 @@ static void * const qemu_st_helpers[MO_SIZE + 1] = { #endif }; +static const TCGLdstHelperParam ldst_helper_param = { + .ntmp = 1, .tmp = { TCG_REG_TMP } +}; + static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - MemOpIdx oi = lb->oi; - MemOp opc = get_memop(oi); + MemOp opc = get_memop(lb->oi); if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); - tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); - tcg_out_adr(s, TCG_REG_X3, lb->raddr); + tcg_out_ld_helper_args(s, lb, &ldst_helper_param); tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); - - tcg_out_movext(s, lb->type, lb->datalo_reg, - TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_X0); + tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); tcg_out_goto(s, lb->raddr); return true; } static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - MemOpIdx oi = lb->oi; - MemOp opc = get_memop(oi); - MemOp size = opc & MO_SIZE; + MemOp opc = get_memop(lb->oi); if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); - tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); - tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); - tcg_out_adr(s, TCG_REG_X4, lb->raddr); + tcg_out_st_helper_args(s, lb, &ldst_helper_param); tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]); tcg_out_goto(s, lb->raddr); return true; } #else +static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target) +{ + ptrdiff_t offset = tcg_pcrel_diff(s, target); + tcg_debug_assert(offset == sextract64(offset, 0, 21)); + tcg_out_insn(s, 3406, ADR, rd, offset); +} + static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) { if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { From 74c17067329042eb48b0e972b8a9292c85f2d244 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 10 Apr 2023 00:15:24 -0700 Subject: [PATCH 24/53] tcg/arm: Convert tcg_out_qemu_{ld,st}_slow_path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret, and tcg_out_st_helper_args. This allows our local tcg_out_arg_* infrastructure to be removed. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 140 +++++---------------------------------- 1 file changed, 18 insertions(+), 122 deletions(-) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index c744512778..df514e56fc 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -690,8 +690,8 @@ tcg_out_ldrd_rwb(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, TCGReg rm) tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 1); } -static void tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt, - TCGReg rn, int imm8) +static void __attribute__((unused)) +tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, int imm8) { tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0); } @@ -969,28 +969,16 @@ static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn) tcg_out_dat_imm(s, COND_AL, ARITH_AND, rd, rn, 0xff); } -static void __attribute__((unused)) -tcg_out_ext8u_cond(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn) -{ - tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff); -} - static void tcg_out_ext16s(TCGContext *s, TCGType t, TCGReg rd, TCGReg rn) { /* sxth */ tcg_out32(s, 0x06bf0070 | (COND_AL << 28) | (rd << 12) | rn); } -static void tcg_out_ext16u_cond(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn) -{ - /* uxth */ - tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn); -} - static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn) { - tcg_out_ext16u_cond(s, COND_AL, rd, rn); + /* uxth */ + tcg_out32(s, 0x06ff0070 | (COND_AL << 28) | (rd << 12) | rn); } static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn) @@ -1382,92 +1370,29 @@ static void * const qemu_st_helpers[MO_SIZE + 1] = { #endif }; -/* Helper routines for marshalling helper function arguments into - * the correct registers and stack. - * argreg is where we want to put this argument, arg is the argument itself. - * Return value is the updated argreg ready for the next call. - * Note that argreg 0..3 is real registers, 4+ on stack. - * - * We provide routines for arguments which are: immediate, 32 bit - * value in register, 16 and 8 bit values in register (which must be zero - * extended before use) and 64 bit value in a lo:hi register pair. - */ -#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \ -static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \ -{ \ - if (argreg < 4) { \ - MOV_ARG(s, COND_AL, argreg, arg); \ - } else { \ - int ofs = (argreg - 4) * 4; \ - EXT_ARG; \ - tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \ - tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \ - } \ - return argreg + 1; \ -} - -DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32, - (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u_cond, - (tcg_out_ext8u_cond(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u_cond, - (tcg_out_ext16u_cond(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, ) - -static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, - TCGReg arglo, TCGReg arghi) +static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg) { - /* 64 bit arguments must go in even/odd register pairs - * and in 8-aligned stack slots. - */ - if (argreg & 1) { - argreg++; - } - if (argreg >= 4 && (arglo & 1) == 0 && arghi == arglo + 1) { - tcg_out_strd_8(s, COND_AL, arglo, - TCG_REG_CALL_STACK, (argreg - 4) * 4); - return argreg + 2; - } else { - argreg = tcg_out_arg_reg32(s, argreg, arglo); - argreg = tcg_out_arg_reg32(s, argreg, arghi); - return argreg; - } + /* We arrive at the slow path via "BLNE", so R14 contains l->raddr. */ + return TCG_REG_R14; } +static const TCGLdstHelperParam ldst_helper_param = { + .ra_gen = ldst_ra_gen, + .ntmp = 1, + .tmp = { TCG_REG_TMP }, +}; + static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - TCGReg argreg; - MemOpIdx oi = lb->oi; - MemOp opc = get_memop(oi); + MemOp opc = get_memop(lb->oi); if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } - argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0); - if (TARGET_LONG_BITS == 64) { - argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); - } else { - argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); - } - argreg = tcg_out_arg_imm32(s, argreg, oi); - argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); - - /* Use the canonical unsigned helpers and minimize icache usage. */ + tcg_out_ld_helper_args(s, lb, &ldst_helper_param); tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); - - if ((opc & MO_SIZE) == MO_64) { - TCGMovExtend ext[2] = { - { .dst = lb->datalo_reg, .dst_type = TCG_TYPE_I32, - .src = TCG_REG_R0, .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, - { .dst = lb->datahi_reg, .dst_type = TCG_TYPE_I32, - .src = TCG_REG_R1, .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, - }; - tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP); - } else { - tcg_out_movext(s, TCG_TYPE_I32, lb->datalo_reg, - TCG_TYPE_I32, opc & MO_SSIZE, TCG_REG_R0); - } + tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); tcg_out_goto(s, COND_AL, lb->raddr); return true; @@ -1475,42 +1400,13 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - TCGReg argreg, datalo, datahi; - MemOpIdx oi = lb->oi; - MemOp opc = get_memop(oi); + MemOp opc = get_memop(lb->oi); if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } - argreg = TCG_REG_R0; - argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); - if (TARGET_LONG_BITS == 64) { - argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); - } else { - argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); - } - - datalo = lb->datalo_reg; - datahi = lb->datahi_reg; - switch (opc & MO_SIZE) { - case MO_8: - argreg = tcg_out_arg_reg8(s, argreg, datalo); - break; - case MO_16: - argreg = tcg_out_arg_reg16(s, argreg, datalo); - break; - case MO_32: - default: - argreg = tcg_out_arg_reg32(s, argreg, datalo); - break; - case MO_64: - argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi); - break; - } - - argreg = tcg_out_arg_imm32(s, argreg, oi); - argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); + tcg_out_st_helper_args(s, lb, &ldst_helper_param); /* Tail-call to the helper, which will return to the fast path. */ tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]); From 338b9e09503714016f9c5c4b649b007e721ca057 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Apr 2023 03:54:06 +0100 Subject: [PATCH 25/53] tcg/loongarch64: Convert tcg_out_qemu_{ld,st}_slow_path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret, and tcg_out_st_helper_args. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/loongarch64/tcg-target.c.inc | 37 ++++++++++---------------------- 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 2f2c34b930..60d2c904dd 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -824,51 +824,36 @@ static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) return reloc_br_sd10k16(s->code_ptr - 1, target); } +static const TCGLdstHelperParam ldst_helper_param = { + .ntmp = 1, .tmp = { TCG_REG_TMP0 } +}; + static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { - MemOpIdx oi = l->oi; - MemOp opc = get_memop(oi); - MemOp size = opc & MO_SIZE; + MemOp opc = get_memop(l->oi); /* resolve label address */ if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } - /* call load helper */ - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0); - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A2, oi); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, (tcg_target_long)l->raddr); - - tcg_out_call_int(s, qemu_ld_helpers[size], false); - - tcg_out_movext(s, l->type, l->datalo_reg, - TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_A0); + tcg_out_ld_helper_args(s, l, &ldst_helper_param); + tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE], false); + tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param); return tcg_out_goto(s, l->raddr); } static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { - MemOpIdx oi = l->oi; - MemOp opc = get_memop(oi); - MemOp size = opc & MO_SIZE; + MemOp opc = get_memop(l->oi); /* resolve label address */ if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } - /* call store helper */ - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0); - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg); - tcg_out_movext(s, size == MO_64 ? TCG_TYPE_I32 : TCG_TYPE_I32, TCG_REG_A2, - l->type, size, l->datalo_reg); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, oi); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A4, (tcg_target_long)l->raddr); - - tcg_out_call_int(s, qemu_st_helpers[size], false); - + tcg_out_st_helper_args(s, l, &ldst_helper_param); + tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false); return tcg_out_goto(s, l->raddr); } #else From f07aaf4856ee642f9f566bc26d8547819871516f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 10 Apr 2023 10:30:21 -0700 Subject: [PATCH 26/53] tcg/mips: Convert tcg_out_qemu_{ld,st}_slow_path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret, and tcg_out_st_helper_args. This allows our local tcg_out_arg_* infrastructure to be removed. We are no longer filling the call or return branch delay slots, nor are we tail-calling for the store, but this seems a small price to pay. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 154 ++++++-------------------------------- 1 file changed, 22 insertions(+), 132 deletions(-) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 94708e6ea7..022960d79a 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1115,79 +1115,15 @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = { [MO_BEUQ] = helper_be_stq_mmu, }; -/* Helper routines for marshalling helper function arguments into - * the correct registers and stack. - * I is where we want to put this argument, and is updated and returned - * for the next call. ARG is the argument itself. - * - * We provide routines for arguments which are: immediate, 32 bit - * value in register, 16 and 8 bit values in register (which must be zero - * extended before use) and 64 bit value in a lo:hi register pair. - */ - -static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg) -{ - if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { - tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg); - } else { - /* For N32 and N64, the initial offset is different. But there - we also have 8 argument register so we don't run out here. */ - tcg_debug_assert(TCG_TARGET_REG_BITS == 32); - tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i); - } - return i + 1; -} - -static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg) -{ - TCGReg tmp = TCG_TMP0; - if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { - tmp = tcg_target_call_iarg_regs[i]; - } - tcg_out_ext8u(s, tmp, arg); - return tcg_out_call_iarg_reg(s, i, tmp); -} - -static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg) -{ - TCGReg tmp = TCG_TMP0; - if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { - tmp = tcg_target_call_iarg_regs[i]; - } - tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xffff); - return tcg_out_call_iarg_reg(s, i, tmp); -} - -static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg) -{ - TCGReg tmp = TCG_TMP0; - if (arg == 0) { - tmp = TCG_REG_ZERO; - } else { - if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { - tmp = tcg_target_call_iarg_regs[i]; - } - tcg_out_movi(s, TCG_TYPE_REG, tmp, arg); - } - return tcg_out_call_iarg_reg(s, i, tmp); -} - -static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah) -{ - tcg_debug_assert(TCG_TARGET_REG_BITS == 32); - i = (i + 1) & ~1; - i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al)); - i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah)); - return i; -} +/* We have four temps, we might as well expose three of them. */ +static const TCGLdstHelperParam ldst_helper_param = { + .ntmp = 3, .tmp = { TCG_TMP0, TCG_TMP1, TCG_TMP2 } +}; static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr); - MemOpIdx oi = l->oi; - MemOp opc = get_memop(oi); - TCGReg v0; - int i; + MemOp opc = get_memop(l->oi); /* resolve label address */ if (!reloc_pc16(l->label_ptr[0], tgt_rx) @@ -1196,29 +1132,13 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) return false; } - i = 1; - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); - } else { - i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); - } - i = tcg_out_call_iarg_imm(s, i, oi); - i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr); + tcg_out_ld_helper_args(s, l, &ldst_helper_param); + tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false); /* delay slot */ - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_nop(s); - v0 = l->datalo_reg; - if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { - /* We eliminated V0 from the possible output registers, so it - cannot be clobbered here. So we must move V1 first. */ - if (MIPS_BE) { - tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V1); - v0 = l->datahi_reg; - } else { - tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1); - } - } + tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param); tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); if (!reloc_pc16(s->code_ptr - 1, l->raddr)) { @@ -1226,22 +1146,14 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } /* delay slot */ - if (TCG_TARGET_REG_BITS == 64 && l->type == TCG_TYPE_I32) { - /* we always sign-extend 32-bit loads */ - tcg_out_ext32s(s, v0, TCG_REG_V0); - } else { - tcg_out_opc_reg(s, OPC_OR, v0, TCG_REG_V0, TCG_REG_ZERO); - } + tcg_out_nop(s); return true; } static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr); - MemOpIdx oi = l->oi; - MemOp opc = get_memop(oi); - MemOp s_bits = opc & MO_SIZE; - int i; + MemOp opc = get_memop(l->oi); /* resolve label address */ if (!reloc_pc16(l->label_ptr[0], tgt_rx) @@ -1250,41 +1162,19 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) return false; } - i = 1; - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); - } else { - i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); - } - switch (s_bits) { - case MO_8: - i = tcg_out_call_iarg_reg8(s, i, l->datalo_reg); - break; - case MO_16: - i = tcg_out_call_iarg_reg16(s, i, l->datalo_reg); - break; - case MO_32: - i = tcg_out_call_iarg_reg(s, i, l->datalo_reg); - break; - case MO_64: - if (TCG_TARGET_REG_BITS == 32) { - i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg); - } else { - i = tcg_out_call_iarg_reg(s, i, l->datalo_reg); - } - break; - default: - g_assert_not_reached(); - } - i = tcg_out_call_iarg_imm(s, i, oi); + tcg_out_st_helper_args(s, l, &ldst_helper_param); - /* Tail call to the store helper. Thus force the return address - computation to take place in the return address register. */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr); - i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA); - tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true); + tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], false); /* delay slot */ - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_nop(s); + + tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); + if (!reloc_pc16(s->code_ptr - 1, l->raddr)) { + return false; + } + + /* delay slot */ + tcg_out_nop(s); return true; } From ec3894191274df2b61219053010d93072ca5b9f2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 10 Apr 2023 11:09:42 -0700 Subject: [PATCH 27/53] tcg/ppc: Convert tcg_out_qemu_{ld,st}_slow_path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret, and tcg_out_st_helper_args. Reviewed-by: Alex Bennée Reviewed-by: Daniel Henrique Barboza Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.c.inc | 88 ++++++++++++---------------------------- 1 file changed, 26 insertions(+), 62 deletions(-) diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 0469e299a0..4c479fdece 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2003,44 +2003,38 @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = { [MO_BEUQ] = helper_be_stq_mmu, }; +static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg) +{ + if (arg < 0) { + arg = TCG_REG_TMP1; + } + tcg_out32(s, MFSPR | RT(arg) | LR); + return arg; +} + +/* + * For the purposes of ppc32 sorting 4 input registers into 4 argument + * registers, there is an outside chance we would require 3 temps. + * Because of constraints, no inputs are in r3, and env will not be + * placed into r3 until after the sorting is done, and is thus free. + */ +static const TCGLdstHelperParam ldst_helper_param = { + .ra_gen = ldst_ra_gen, + .ntmp = 3, + .tmp = { TCG_REG_TMP1, TCG_REG_R0, TCG_REG_R3 } +}; + static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - MemOpIdx oi = lb->oi; - MemOp opc = get_memop(oi); - TCGReg hi, lo, arg = TCG_REG_R3; + MemOp opc = get_memop(lb->oi); if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } - tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); - - lo = lb->addrlo_reg; - hi = lb->addrhi_reg; - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN); - tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); - tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); - } else { - /* If the address needed to be zero-extended, we'll have already - placed it in R4. The only remaining case is 64-bit guest. */ - tcg_out_mov(s, TCG_TYPE_TL, arg++, lo); - } - - tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); - tcg_out32(s, MFSPR | RT(arg) | LR); - + tcg_out_ld_helper_args(s, lb, &ldst_helper_param); tcg_out_call_int(s, LK, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); - - lo = lb->datalo_reg; - hi = lb->datahi_reg; - if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { - tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4); - tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3); - } else { - tcg_out_movext(s, lb->type, lo, - TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_R3); - } + tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); tcg_out_b(s, 0, lb->raddr); return true; @@ -2048,43 +2042,13 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - MemOpIdx oi = lb->oi; - MemOp opc = get_memop(oi); - MemOp s_bits = opc & MO_SIZE; - TCGReg hi, lo, arg = TCG_REG_R3; + MemOp opc = get_memop(lb->oi); if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } - tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); - - lo = lb->addrlo_reg; - hi = lb->addrhi_reg; - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN); - tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); - tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); - } else { - /* If the address needed to be zero-extended, we'll have already - placed it in R4. The only remaining case is 64-bit guest. */ - tcg_out_mov(s, TCG_TYPE_TL, arg++, lo); - } - - lo = lb->datalo_reg; - hi = lb->datahi_reg; - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { - arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN); - tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); - tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); - } else { - tcg_out_movext(s, s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32, - arg++, lb->type, s_bits, lo); - } - - tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); - tcg_out32(s, MFSPR | RT(arg) | LR); - + tcg_out_st_helper_args(s, lb, &ldst_helper_param); tcg_out_call_int(s, LK, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); tcg_out_b(s, 0, lb->raddr); From 61b6daafb49148690fdc82e570b3daabfbaaf8a9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 10 Apr 2023 11:59:54 -0700 Subject: [PATCH 28/53] tcg/riscv: Convert tcg_out_qemu_{ld,st}_slow_path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret, and tcg_out_st_helper_args. Reviewed-by: Alex Bennée Reviewed-by: Daniel Henrique Barboza Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target.c.inc | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 2b2d313fe2..c22d1e35ac 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -906,14 +906,14 @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) tcg_debug_assert(ok); } +/* We have three temps, we might as well expose them. */ +static const TCGLdstHelperParam ldst_helper_param = { + .ntmp = 3, .tmp = { TCG_REG_TMP0, TCG_REG_TMP1, TCG_REG_TMP2 } +}; + static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { - MemOpIdx oi = l->oi; - MemOp opc = get_memop(oi); - TCGReg a0 = tcg_target_call_iarg_regs[0]; - TCGReg a1 = tcg_target_call_iarg_regs[1]; - TCGReg a2 = tcg_target_call_iarg_regs[2]; - TCGReg a3 = tcg_target_call_iarg_regs[3]; + MemOp opc = get_memop(l->oi); /* resolve label address */ if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { @@ -921,13 +921,9 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } /* call load helper */ - tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0); - tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg); - tcg_out_movi(s, TCG_TYPE_PTR, a2, oi); - tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr); - + tcg_out_ld_helper_args(s, l, &ldst_helper_param); tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false); - tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0); + tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param); tcg_out_goto(s, l->raddr); return true; @@ -935,14 +931,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { - MemOpIdx oi = l->oi; - MemOp opc = get_memop(oi); - MemOp s_bits = opc & MO_SIZE; - TCGReg a0 = tcg_target_call_iarg_regs[0]; - TCGReg a1 = tcg_target_call_iarg_regs[1]; - TCGReg a2 = tcg_target_call_iarg_regs[2]; - TCGReg a3 = tcg_target_call_iarg_regs[3]; - TCGReg a4 = tcg_target_call_iarg_regs[4]; + MemOp opc = get_memop(l->oi); /* resolve label address */ if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { @@ -950,13 +939,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } /* call store helper */ - tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0); - tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg); - tcg_out_movext(s, s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32, a2, - l->type, s_bits, l->datalo_reg); - tcg_out_movi(s, TCG_TYPE_PTR, a3, oi); - tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr); - + tcg_out_st_helper_args(s, l, &ldst_helper_param); tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false); tcg_out_goto(s, l->raddr); From eb491329f6eea979e95db191b5a4b44a09b008bf Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 10 Apr 2023 12:23:08 -0700 Subject: [PATCH 29/53] tcg/s390x: Convert tcg_out_qemu_{ld,st}_slow_path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret, and tcg_out_st_helper_args. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/s390x/tcg-target.c.inc | 35 ++++++++++------------------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index c3157d22be..dfcf4d9e34 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1718,26 +1718,22 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data, } #if defined(CONFIG_SOFTMMU) +static const TCGLdstHelperParam ldst_helper_param = { + .ntmp = 1, .tmp = { TCG_TMP0 } +}; + static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - TCGReg addr_reg = lb->addrlo_reg; - TCGReg data_reg = lb->datalo_reg; - MemOpIdx oi = lb->oi; - MemOp opc = get_memop(oi); + MemOp opc = get_memop(lb->oi); if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) { return false; } - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); - if (TARGET_LONG_BITS == 64) { - tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); - } - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr); - tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]); - tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); + tcg_out_ld_helper_args(s, lb, &ldst_helper_param); + tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); + tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); return true; @@ -1745,25 +1741,14 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - TCGReg addr_reg = lb->addrlo_reg; - TCGReg data_reg = lb->datalo_reg; - MemOpIdx oi = lb->oi; - MemOp opc = get_memop(oi); - MemOp size = opc & MO_SIZE; + MemOp opc = get_memop(lb->oi); if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) { return false; } - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); - if (TARGET_LONG_BITS == 64) { - tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); - } - tcg_out_movext(s, size == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32, - TCG_REG_R4, lb->type, size, data_reg); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr); + tcg_out_st_helper_args(s, lb, &ldst_helper_param); tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); From e3205306d89cd4abcd4d88cbdad38046554249db Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 3 Apr 2023 17:36:02 +0000 Subject: [PATCH 30/53] tcg/loongarch64: Simplify constraints on qemu_ld/st MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The softmmu tlb uses TCG_REG_TMP[0-2], not any of the normally available registers. Now that we handle overlap betwen inputs and helper arguments, we can allow any allocatable reg. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/loongarch64/tcg-target-con-set.h | 2 -- tcg/loongarch64/tcg-target-con-str.h | 1 - tcg/loongarch64/tcg-target.c.inc | 23 ++++------------------- 3 files changed, 4 insertions(+), 22 deletions(-) diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h index 172c107289..c2bde44613 100644 --- a/tcg/loongarch64/tcg-target-con-set.h +++ b/tcg/loongarch64/tcg-target-con-set.h @@ -17,9 +17,7 @@ C_O0_I1(r) C_O0_I2(rZ, r) C_O0_I2(rZ, rZ) -C_O0_I2(LZ, L) C_O1_I1(r, r) -C_O1_I1(r, L) C_O1_I2(r, r, rC) C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) diff --git a/tcg/loongarch64/tcg-target-con-str.h b/tcg/loongarch64/tcg-target-con-str.h index 541ff47fa9..6e9ccca3ad 100644 --- a/tcg/loongarch64/tcg-target-con-str.h +++ b/tcg/loongarch64/tcg-target-con-str.h @@ -14,7 +14,6 @@ * REGS(letter, register_mask) */ REGS('r', ALL_GENERAL_REGS) -REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS) /* * Define constraint letters for constants: diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 60d2c904dd..83fa45c802 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -133,18 +133,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define TCG_CT_CONST_C12 0x1000 #define TCG_CT_CONST_WSZ 0x2000 -#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) -/* - * For softmmu, we need to avoid conflicts with the first 5 - * argument registers to call the helper. Some of these are - * also used for the tlb lookup. - */ -#ifdef CONFIG_SOFTMMU -#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_A0, 5) -#else -#define SOFTMMU_RESERVE_REGS 0 -#endif - +#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len) { @@ -1541,16 +1530,14 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_st32_i64: case INDEX_op_st_i32: case INDEX_op_st_i64: + case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st_i64: return C_O0_I2(rZ, r); case INDEX_op_brcond_i32: case INDEX_op_brcond_i64: return C_O0_I2(rZ, rZ); - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: - return C_O0_I2(LZ, L); - case INDEX_op_ext8s_i32: case INDEX_op_ext8s_i64: case INDEX_op_ext8u_i32: @@ -1586,11 +1573,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_ld32u_i64: case INDEX_op_ld_i32: case INDEX_op_ld_i64: - return C_O1_I1(r, r); - case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: - return C_O1_I1(r, L); + return C_O1_I1(r, r); case INDEX_op_andc_i32: case INDEX_op_andc_i64: From a79956219f264945d06a540387a8f72f5ba4953b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 8 Apr 2023 06:15:41 +0100 Subject: [PATCH 31/53] tcg/mips: Remove MO_BSWAP handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While performing the load in the delay slot of the call to the common bswap helper function is cute, it is not worth the added complexity. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 284 ++++++-------------------------------- tcg/mips/tcg-target.h | 4 +- 2 files changed, 48 insertions(+), 240 deletions(-) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 022960d79a..31d58e1977 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1088,31 +1088,35 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg, } #if defined(CONFIG_SOFTMMU) -static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = { +static void * const qemu_ld_helpers[MO_SSIZE + 1] = { [MO_UB] = helper_ret_ldub_mmu, [MO_SB] = helper_ret_ldsb_mmu, - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LESW] = helper_le_ldsw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LEUQ] = helper_le_ldq_mmu, - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BESW] = helper_be_ldsw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BEUQ] = helper_be_ldq_mmu, -#if TCG_TARGET_REG_BITS == 64 - [MO_LESL] = helper_le_ldsl_mmu, - [MO_BESL] = helper_be_ldsl_mmu, +#if HOST_BIG_ENDIAN + [MO_UW] = helper_be_lduw_mmu, + [MO_SW] = helper_be_ldsw_mmu, + [MO_UL] = helper_be_ldul_mmu, + [MO_SL] = helper_be_ldsl_mmu, + [MO_UQ] = helper_be_ldq_mmu, +#else + [MO_UW] = helper_le_lduw_mmu, + [MO_SW] = helper_le_ldsw_mmu, + [MO_UL] = helper_le_ldul_mmu, + [MO_UQ] = helper_le_ldq_mmu, + [MO_SL] = helper_le_ldsl_mmu, #endif }; -static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = { +static void * const qemu_st_helpers[MO_SIZE + 1] = { [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEUQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEUQ] = helper_be_stq_mmu, +#if HOST_BIG_ENDIAN + [MO_UW] = helper_be_stw_mmu, + [MO_UL] = helper_be_stl_mmu, + [MO_UQ] = helper_be_stq_mmu, +#else + [MO_UW] = helper_le_stw_mmu, + [MO_UL] = helper_le_stl_mmu, + [MO_UQ] = helper_le_stq_mmu, +#endif }; /* We have four temps, we might as well expose three of them. */ @@ -1134,7 +1138,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_ld_helper_args(s, l, &ldst_helper_param); - tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false); + tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false); /* delay slot */ tcg_out_nop(s); @@ -1164,7 +1168,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_st_helper_args(s, l, &ldst_helper_param); - tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], false); + tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false); /* delay slot */ tcg_out_nop(s); @@ -1379,52 +1383,19 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, MemOp opc, TCGType type) { - switch (opc & (MO_SSIZE | MO_BSWAP)) { + switch (opc & MO_SSIZE) { case MO_UB: tcg_out_opc_imm(s, OPC_LBU, lo, base, 0); break; case MO_SB: tcg_out_opc_imm(s, OPC_LB, lo, base, 0); break; - case MO_UW | MO_BSWAP: - tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); - tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OZ); - break; case MO_UW: tcg_out_opc_imm(s, OPC_LHU, lo, base, 0); break; - case MO_SW | MO_BSWAP: - tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); - tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OS); - break; case MO_SW: tcg_out_opc_imm(s, OPC_LH, lo, base, 0); break; - case MO_UL | MO_BSWAP: - if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) { - if (use_mips32r2_instructions) { - tcg_out_opc_imm(s, OPC_LWU, lo, base, 0); - tcg_out_bswap32(s, lo, lo, TCG_BSWAP_IZ | TCG_BSWAP_OZ); - } else { - tcg_out_bswap_subr(s, bswap32u_addr); - /* delay slot */ - tcg_out_opc_imm(s, OPC_LWU, TCG_TMP0, base, 0); - tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3); - } - break; - } - /* FALLTHRU */ - case MO_SL | MO_BSWAP: - if (use_mips32r2_instructions) { - tcg_out_opc_imm(s, OPC_LW, lo, base, 0); - tcg_out_bswap32(s, lo, lo, 0); - } else { - tcg_out_bswap_subr(s, bswap32_addr); - /* delay slot */ - tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0); - tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_TMP3); - } - break; case MO_UL: if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) { tcg_out_opc_imm(s, OPC_LWU, lo, base, 0); @@ -1434,35 +1405,6 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, case MO_SL: tcg_out_opc_imm(s, OPC_LW, lo, base, 0); break; - case MO_UQ | MO_BSWAP: - if (TCG_TARGET_REG_BITS == 64) { - if (use_mips32r2_instructions) { - tcg_out_opc_imm(s, OPC_LD, lo, base, 0); - tcg_out_bswap64(s, lo, lo); - } else { - tcg_out_bswap_subr(s, bswap64_addr); - /* delay slot */ - tcg_out_opc_imm(s, OPC_LD, TCG_TMP0, base, 0); - tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3); - } - } else if (use_mips32r2_instructions) { - tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0); - tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 4); - tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0); - tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1); - tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16); - tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16); - } else { - tcg_out_bswap_subr(s, bswap32_addr); - /* delay slot */ - tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0); - tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 4); - tcg_out_bswap_subr(s, bswap32_addr); - /* delay slot */ - tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? lo : hi, TCG_TMP3); - tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? hi : lo, TCG_TMP3); - } - break; case MO_UQ: /* Prefer to load from offset 0 first, but allow for overlap. */ if (TCG_TARGET_REG_BITS == 64) { @@ -1487,25 +1429,20 @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi, const MIPSInsn lw2 = MIPS_BE ? OPC_LWR : OPC_LWL; const MIPSInsn ld1 = MIPS_BE ? OPC_LDL : OPC_LDR; const MIPSInsn ld2 = MIPS_BE ? OPC_LDR : OPC_LDL; + bool sgn = opc & MO_SIGN; - bool sgn = (opc & MO_SIGN); - - switch (opc & (MO_SSIZE | MO_BSWAP)) { - case MO_SW | MO_BE: - case MO_UW | MO_BE: - tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 0); - tcg_out_opc_imm(s, OPC_LBU, lo, base, 1); - if (use_mips32r2_instructions) { - tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8); - } else { - tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8); - tcg_out_opc_reg(s, OPC_OR, lo, TCG_TMP0, TCG_TMP1); - } - break; - - case MO_SW | MO_LE: - case MO_UW | MO_LE: - if (use_mips32r2_instructions && lo != base) { + switch (opc & MO_SIZE) { + case MO_16: + if (HOST_BIG_ENDIAN) { + tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 0); + tcg_out_opc_imm(s, OPC_LBU, lo, base, 1); + if (use_mips32r2_instructions) { + tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8); + } else { + tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8); + tcg_out_opc_reg(s, OPC_OR, lo, lo, TCG_TMP0); + } + } else if (use_mips32r2_instructions && lo != base) { tcg_out_opc_imm(s, OPC_LBU, lo, base, 0); tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 1); tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8); @@ -1517,8 +1454,7 @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi, } break; - case MO_SL: - case MO_UL: + case MO_32: tcg_out_opc_imm(s, lw1, lo, base, 0); tcg_out_opc_imm(s, lw2, lo, base, 3); if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64 && !sgn) { @@ -1526,28 +1462,7 @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi, } break; - case MO_UL | MO_BSWAP: - case MO_SL | MO_BSWAP: - if (use_mips32r2_instructions) { - tcg_out_opc_imm(s, lw1, lo, base, 0); - tcg_out_opc_imm(s, lw2, lo, base, 3); - tcg_out_bswap32(s, lo, lo, - TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64 - ? (sgn ? TCG_BSWAP_OS : TCG_BSWAP_OZ) : 0); - } else { - const tcg_insn_unit *subr = - (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64 && !sgn - ? bswap32u_addr : bswap32_addr); - - tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0); - tcg_out_bswap_subr(s, subr); - /* delay slot */ - tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 3); - tcg_out_mov(s, type, lo, TCG_TMP3); - } - break; - - case MO_UQ: + case MO_64: if (TCG_TARGET_REG_BITS == 64) { tcg_out_opc_imm(s, ld1, lo, base, 0); tcg_out_opc_imm(s, ld2, lo, base, 7); @@ -1559,42 +1474,6 @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi, } break; - case MO_UQ | MO_BSWAP: - if (TCG_TARGET_REG_BITS == 64) { - if (use_mips32r2_instructions) { - tcg_out_opc_imm(s, ld1, lo, base, 0); - tcg_out_opc_imm(s, ld2, lo, base, 7); - tcg_out_bswap64(s, lo, lo); - } else { - tcg_out_opc_imm(s, ld1, TCG_TMP0, base, 0); - tcg_out_bswap_subr(s, bswap64_addr); - /* delay slot */ - tcg_out_opc_imm(s, ld2, TCG_TMP0, base, 7); - tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3); - } - } else if (use_mips32r2_instructions) { - tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0 + 0); - tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 0 + 3); - tcg_out_opc_imm(s, lw1, TCG_TMP1, base, 4 + 0); - tcg_out_opc_imm(s, lw2, TCG_TMP1, base, 4 + 3); - tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0); - tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1); - tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16); - tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16); - } else { - tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0 + 0); - tcg_out_bswap_subr(s, bswap32_addr); - /* delay slot */ - tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 0 + 3); - tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 4 + 0); - tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? lo : hi, TCG_TMP3); - tcg_out_bswap_subr(s, bswap32_addr); - /* delay slot */ - tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 4 + 3); - tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? hi : lo, TCG_TMP3); - } - break; - default: g_assert_not_reached(); } @@ -1627,50 +1506,16 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, MemOp opc) { - /* Don't clutter the code below with checks to avoid bswapping ZERO. */ - if ((lo | hi) == 0) { - opc &= ~MO_BSWAP; - } - - switch (opc & (MO_SIZE | MO_BSWAP)) { + switch (opc & MO_SIZE) { case MO_8: tcg_out_opc_imm(s, OPC_SB, lo, base, 0); break; - - case MO_16 | MO_BSWAP: - tcg_out_bswap16(s, TCG_TMP1, lo, 0); - lo = TCG_TMP1; - /* FALLTHRU */ case MO_16: tcg_out_opc_imm(s, OPC_SH, lo, base, 0); break; - - case MO_32 | MO_BSWAP: - tcg_out_bswap32(s, TCG_TMP3, lo, 0); - lo = TCG_TMP3; - /* FALLTHRU */ case MO_32: tcg_out_opc_imm(s, OPC_SW, lo, base, 0); break; - - case MO_64 | MO_BSWAP: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_bswap64(s, TCG_TMP3, lo); - tcg_out_opc_imm(s, OPC_SD, TCG_TMP3, base, 0); - } else if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? lo : hi); - tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? hi : lo); - tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16); - tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16); - tcg_out_opc_imm(s, OPC_SW, TCG_TMP0, base, 0); - tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, 4); - } else { - tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi, 0); - tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 0); - tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo, 0); - tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 4); - } - break; case MO_64: if (TCG_TARGET_REG_BITS == 64) { tcg_out_opc_imm(s, OPC_SD, lo, base, 0); @@ -1679,7 +1524,6 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? lo : hi, base, 4); } break; - default: g_assert_not_reached(); } @@ -1693,54 +1537,18 @@ static void tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi, const MIPSInsn sd1 = MIPS_BE ? OPC_SDL : OPC_SDR; const MIPSInsn sd2 = MIPS_BE ? OPC_SDR : OPC_SDL; - /* Don't clutter the code below with checks to avoid bswapping ZERO. */ - if ((lo | hi) == 0) { - opc &= ~MO_BSWAP; - } - - switch (opc & (MO_SIZE | MO_BSWAP)) { - case MO_16 | MO_BE: + switch (opc & MO_SIZE) { + case MO_16: tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, lo, 8); - tcg_out_opc_imm(s, OPC_SB, TCG_TMP0, base, 0); - tcg_out_opc_imm(s, OPC_SB, lo, base, 1); + tcg_out_opc_imm(s, OPC_SB, HOST_BIG_ENDIAN ? TCG_TMP0 : lo, base, 0); + tcg_out_opc_imm(s, OPC_SB, HOST_BIG_ENDIAN ? lo : TCG_TMP0, base, 1); break; - case MO_16 | MO_LE: - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, lo, 8); - tcg_out_opc_imm(s, OPC_SB, lo, base, 0); - tcg_out_opc_imm(s, OPC_SB, TCG_TMP0, base, 1); - break; - - case MO_32 | MO_BSWAP: - tcg_out_bswap32(s, TCG_TMP3, lo, 0); - lo = TCG_TMP3; - /* fall through */ case MO_32: tcg_out_opc_imm(s, sw1, lo, base, 0); tcg_out_opc_imm(s, sw2, lo, base, 3); break; - case MO_64 | MO_BSWAP: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_bswap64(s, TCG_TMP3, lo); - lo = TCG_TMP3; - } else if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? hi : lo); - tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? lo : hi); - tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16); - tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16); - hi = MIPS_BE ? TCG_TMP0 : TCG_TMP1; - lo = MIPS_BE ? TCG_TMP1 : TCG_TMP0; - } else { - tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi, 0); - tcg_out_opc_imm(s, sw1, TCG_TMP3, base, 0 + 0); - tcg_out_opc_imm(s, sw2, TCG_TMP3, base, 0 + 3); - tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo, 0); - tcg_out_opc_imm(s, sw1, TCG_TMP3, base, 4 + 0); - tcg_out_opc_imm(s, sw2, TCG_TMP3, base, 4 + 3); - break; - } - /* fall through */ case MO_64: if (TCG_TARGET_REG_BITS == 64) { tcg_out_opc_imm(s, sd1, lo, base, 0); diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 2431fc5353..42bd7fff01 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -204,8 +204,8 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */ #endif -#define TCG_TARGET_DEFAULT_MO (0) -#define TCG_TARGET_HAS_MEMORY_BSWAP 1 +#define TCG_TARGET_DEFAULT_MO 0 +#define TCG_TARGET_HAS_MEMORY_BSWAP 0 #define TCG_TARGET_NEED_LDST_LABELS From 2f2a3d1d0b11416aa39cd6127d9632318c7e345b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Apr 2023 04:29:40 +0100 Subject: [PATCH 32/53] tcg/mips: Reorg tlb load within prepare_host_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compare the address vs the tlb entry with sign-extended values. This simplifies the page+alignment mask constant, and the generation of the last byte address for the misaligned test. Move the tlb addend load up, and the zero-extension down. This frees up a register, which allows us use TMP3 as the returned base address register instead of A0, which we were using as a 5th temporary. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 31d58e1977..695c137023 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -370,6 +370,8 @@ typedef enum { ALIAS_PADDI = sizeof(void *) == 4 ? OPC_ADDIU : OPC_DADDIU, ALIAS_TSRL = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32 ? OPC_SRL : OPC_DSRL, + ALIAS_TADDI = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32 + ? OPC_ADDIU : OPC_DADDIU, } MIPSInsn; /* @@ -1263,14 +1265,12 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, int add_off = offsetof(CPUTLBEntry, addend); int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read) : offsetof(CPUTLBEntry, addr_write); - target_ulong tlb_mask; ldst = new_ldst_label(s); ldst->is_ld = is_ld; ldst->oi = oi; ldst->addrlo_reg = addrlo; ldst->addrhi_reg = addrhi; - base = TCG_REG_A0; /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); @@ -1290,15 +1290,12 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF); } else { - tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD - : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW), - TCG_TMP0, TCG_TMP3, cmp_off); + tcg_out_ld(s, TCG_TYPE_TL, TCG_TMP0, TCG_TMP3, cmp_off); } - /* Zero extend a 32-bit guest address for a 64-bit host. */ - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - tcg_out_ext32u(s, base, addrlo); - addrlo = base; + if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) { + /* Load the tlb addend for the fast path. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off); } /* @@ -1306,18 +1303,18 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, * For unaligned accesses, compare against the end of the access to * verify that it does not cross a page boundary. */ - tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask; - tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask); - if (a_mask >= s_mask) { - tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo); - } else { - tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrlo, s_mask - a_mask); + tcg_out_movi(s, TCG_TYPE_TL, TCG_TMP1, TARGET_PAGE_MASK | a_mask); + if (a_mask < s_mask) { + tcg_out_opc_imm(s, ALIAS_TADDI, TCG_TMP2, addrlo, s_mask - a_mask); tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2); + } else { + tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo); } - if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) { - /* Load the tlb addend for the fast path. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off); + /* Zero extend a 32-bit guest address for a 64-bit host. */ + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + tcg_out_ext32u(s, TCG_TMP2, addrlo); + addrlo = TCG_TMP2; } ldst->label_ptr[0] = s->code_ptr; @@ -1329,14 +1326,15 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF); /* Load the tlb addend for the fast path. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off); ldst->label_ptr[1] = s->code_ptr; tcg_out_opc_br(s, OPC_BNE, addrhi, TCG_TMP0); } /* delay slot */ - tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrlo); + base = TCG_TMP3; + tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP3, addrlo); #else if (a_mask && (use_mips32r6_instructions || a_bits != s_bits)) { ldst = new_ldst_label(s); From f48cc9020b6a1b41f7ccc639a09fecc5a6b42ad1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 2 Apr 2023 22:06:53 -0700 Subject: [PATCH 33/53] tcg/mips: Simplify constraints on qemu_ld/st MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The softmmu tlb uses TCG_REG_TMP[0-3], not any of the normally available registers. Now that we handle overlap betwen inputs and helper arguments, and have eliminated use of A0, we can allow any allocatable reg. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/mips/tcg-target-con-set.h | 13 +++++-------- tcg/mips/tcg-target-con-str.h | 2 -- tcg/mips/tcg-target.c.inc | 30 ++++++++---------------------- 3 files changed, 13 insertions(+), 32 deletions(-) diff --git a/tcg/mips/tcg-target-con-set.h b/tcg/mips/tcg-target-con-set.h index fe3e868a2f..864034f468 100644 --- a/tcg/mips/tcg-target-con-set.h +++ b/tcg/mips/tcg-target-con-set.h @@ -12,15 +12,13 @@ C_O0_I1(r) C_O0_I2(rZ, r) C_O0_I2(rZ, rZ) -C_O0_I2(SZ, S) -C_O0_I3(SZ, S, S) -C_O0_I3(SZ, SZ, S) +C_O0_I3(rZ, r, r) +C_O0_I3(rZ, rZ, r) C_O0_I4(rZ, rZ, rZ, rZ) -C_O0_I4(SZ, SZ, S, S) -C_O1_I1(r, L) +C_O0_I4(rZ, rZ, r, r) C_O1_I1(r, r) C_O1_I2(r, 0, rZ) -C_O1_I2(r, L, L) +C_O1_I2(r, r, r) C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) C_O1_I2(r, r, rIK) @@ -30,7 +28,6 @@ C_O1_I2(r, rZ, rN) C_O1_I2(r, rZ, rZ) C_O1_I4(r, rZ, rZ, rZ, 0) C_O1_I4(r, rZ, rZ, rZ, rZ) -C_O2_I1(r, r, L) -C_O2_I2(r, r, L, L) +C_O2_I1(r, r, r) C_O2_I2(r, r, r, r) C_O2_I4(r, r, rZ, rZ, rN, rN) diff --git a/tcg/mips/tcg-target-con-str.h b/tcg/mips/tcg-target-con-str.h index e4b2965c72..413c280a7a 100644 --- a/tcg/mips/tcg-target-con-str.h +++ b/tcg/mips/tcg-target-con-str.h @@ -9,8 +9,6 @@ * REGS(letter, register_mask) */ REGS('r', ALL_GENERAL_REGS) -REGS('L', ALL_QLOAD_REGS) -REGS('S', ALL_QSTORE_REGS) /* * Define constraint letters for constants: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 695c137023..5ad9867882 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -176,20 +176,6 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, #define TCG_CT_CONST_WSZ 0x2000 /* word size */ #define ALL_GENERAL_REGS 0xffffffffu -#define NOA0_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_A0)) - -#ifdef CONFIG_SOFTMMU -#define ALL_QLOAD_REGS \ - (NOA0_REGS & ~((TCG_TARGET_REG_BITS < TARGET_LONG_BITS) << TCG_REG_A2)) -#define ALL_QSTORE_REGS \ - (NOA0_REGS & ~(TCG_TARGET_REG_BITS < TARGET_LONG_BITS \ - ? (1 << TCG_REG_A2) | (1 << TCG_REG_A3) \ - : (1 << TCG_REG_A1))) -#else -#define ALL_QLOAD_REGS NOA0_REGS -#define ALL_QSTORE_REGS NOA0_REGS -#endif - static bool is_p2m1(tcg_target_long val) { @@ -2232,18 +2218,18 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_qemu_ld_i32: return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32 - ? C_O1_I1(r, L) : C_O1_I2(r, L, L)); + ? C_O1_I1(r, r) : C_O1_I2(r, r, r)); case INDEX_op_qemu_st_i32: return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32 - ? C_O0_I2(SZ, S) : C_O0_I3(SZ, S, S)); + ? C_O0_I2(rZ, r) : C_O0_I3(rZ, r, r)); case INDEX_op_qemu_ld_i64: - return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) - : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, L) - : C_O2_I2(r, r, L, L)); + return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) + : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r) + : C_O2_I2(r, r, r, r)); case INDEX_op_qemu_st_i64: - return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(SZ, S) - : TARGET_LONG_BITS == 32 ? C_O0_I3(SZ, SZ, S) - : C_O0_I4(SZ, SZ, S, S)); + return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r) + : TARGET_LONG_BITS == 32 ? C_O0_I3(rZ, rZ, r) + : C_O0_I4(rZ, rZ, r, r)); default: g_assert_not_reached(); From 01a112e2e9f8c969b3d190e1fd2faf24644a771c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 4 Apr 2023 01:25:06 +0300 Subject: [PATCH 34/53] tcg/ppc: Reorg tcg_out_tlb_read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allocate TCG_REG_TMP2. Use R0, TMP1, TMP2 instead of any of the normally allocated registers for the tlb load. Reviewed-by: Alex Bennée Reviewed-by: Daniel Henrique Barboza Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.c.inc | 78 ++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 31 deletions(-) diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 4c479fdece..dbba304e42 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -68,6 +68,7 @@ #else # define TCG_REG_TMP1 TCG_REG_R12 #endif +#define TCG_REG_TMP2 TCG_REG_R11 #define TCG_VEC_TMP1 TCG_REG_V0 #define TCG_VEC_TMP2 TCG_REG_V1 @@ -2015,13 +2016,11 @@ static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg) /* * For the purposes of ppc32 sorting 4 input registers into 4 argument * registers, there is an outside chance we would require 3 temps. - * Because of constraints, no inputs are in r3, and env will not be - * placed into r3 until after the sorting is done, and is thus free. */ static const TCGLdstHelperParam ldst_helper_param = { .ra_gen = ldst_ra_gen, .ntmp = 3, - .tmp = { TCG_REG_TMP1, TCG_REG_R0, TCG_REG_R3 } + .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 } }; static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) @@ -2135,31 +2134,31 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off); /* Extract the page index, shifted into place for tlb index. */ if (TCG_TARGET_REG_BITS == 32) { - tcg_out_shri32(s, TCG_REG_TMP1, addrlo, + tcg_out_shri32(s, TCG_REG_R0, addrlo, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); } else { - tcg_out_shri64(s, TCG_REG_TMP1, addrlo, + tcg_out_shri64(s, TCG_REG_R0, addrlo, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); } - tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1)); + tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0)); - /* Load the TLB comparator. */ + /* Load the (low part) TLB comparator into TMP2. */ if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) { uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32 ? LWZUX : LDUX); - tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4)); + tcg_out32(s, lxu | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2)); } else { - tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4)); + tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2)); if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4); - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off); + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, + TCG_REG_TMP1, cmp_off + 4 * HOST_BIG_ENDIAN); } else { - tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off); + tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off); } } @@ -2167,11 +2166,12 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, * Load the TLB addend for use on the fast path. * Do this asap to minimize any load use delay. */ - h->base = TCG_REG_R3; - tcg_out_ld(s, TCG_TYPE_PTR, h->base, TCG_REG_R3, - offsetof(CPUTLBEntry, addend)); + if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) { + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, + offsetof(CPUTLBEntry, addend)); + } - /* Clear the non-page, non-alignment bits from the address */ + /* Clear the non-page, non-alignment bits from the address in R0. */ if (TCG_TARGET_REG_BITS == 32) { /* * We don't support unaligned accesses on 32-bits. @@ -2204,9 +2204,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, if (TARGET_LONG_BITS == 32) { tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0, (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS); - /* Zero-extend the address for use in the final address. */ - tcg_out_ext32u(s, TCG_REG_R4, addrlo); - addrlo = TCG_REG_R4; } else if (a_bits == 0) { tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS); } else { @@ -2215,21 +2212,36 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); } } - h->index = addrlo; if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, + /* Low part comparison into cr7. */ + tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 0, 7, TCG_TYPE_I32); - tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32); + + /* Load the high part TLB comparator into TMP2. */ + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1, + cmp_off + 4 * !HOST_BIG_ENDIAN); + + /* Load addend, deferred for this case. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, + offsetof(CPUTLBEntry, addend)); + + /* High part comparison into cr6. */ + tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2, 0, 6, TCG_TYPE_I32); + + /* Combine comparisons into cr7. */ tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); } else { - tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, + /* Full comparison into cr7. */ + tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 0, 7, TCG_TYPE_TL); } /* Load a pointer into the current opcode w/conditional branch-link. */ ldst->label_ptr[0] = s->code_ptr; tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); + + h->base = TCG_REG_TMP1; #else if (a_bits) { ldst = new_ldst_label(s); @@ -2247,13 +2259,16 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, } h->base = guest_base ? TCG_GUEST_BASE_REG : 0; - h->index = addrlo; - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); - h->index = TCG_REG_TMP1; - } #endif + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + /* Zero-extend the guest address for use in the host address. */ + tcg_out_ext32u(s, TCG_REG_R0, addrlo); + h->index = TCG_REG_R0; + } else { + h->index = addrlo; + } + return ldst; } @@ -3905,7 +3920,8 @@ static void tcg_target_init(TCGContext *s) #if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ #endif - tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1); tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2); if (USE_REG_TB) { From b41b43a4773c64c40b563c4da49285487f1ca1d8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 4 Apr 2023 03:04:49 +0300 Subject: [PATCH 35/53] tcg/ppc: Adjust constraints on qemu_ld/st MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The softmmu tlb uses TCG_REG_{TMP1,TMP2,R0}, not any of the normally available registers. Now that we handle overlap betwen inputs and helper arguments, we can allow any allocatable reg. Reviewed-by: Alex Bennée Reviewed-by: Daniel Henrique Barboza Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target-con-set.h | 11 ++++------- tcg/ppc/tcg-target-con-str.h | 2 -- tcg/ppc/tcg-target.c.inc | 32 ++++++++++---------------------- 3 files changed, 14 insertions(+), 31 deletions(-) diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h index a1a345883d..f206b29205 100644 --- a/tcg/ppc/tcg-target-con-set.h +++ b/tcg/ppc/tcg-target-con-set.h @@ -12,18 +12,15 @@ C_O0_I1(r) C_O0_I2(r, r) C_O0_I2(r, ri) -C_O0_I2(S, S) C_O0_I2(v, r) -C_O0_I3(S, S, S) +C_O0_I3(r, r, r) C_O0_I4(r, r, ri, ri) -C_O0_I4(S, S, S, S) -C_O1_I1(r, L) +C_O0_I4(r, r, r, r) C_O1_I1(r, r) C_O1_I1(v, r) C_O1_I1(v, v) C_O1_I1(v, vr) C_O1_I2(r, 0, rZ) -C_O1_I2(r, L, L) C_O1_I2(r, rI, ri) C_O1_I2(r, rI, rT) C_O1_I2(r, r, r) @@ -36,7 +33,7 @@ C_O1_I2(v, v, v) C_O1_I3(v, v, v, v) C_O1_I4(r, r, ri, rZ, rZ) C_O1_I4(r, r, r, ri, ri) -C_O2_I1(L, L, L) -C_O2_I2(L, L, L, L) +C_O2_I1(r, r, r) +C_O2_I2(r, r, r, r) C_O2_I4(r, r, rI, rZM, r, r) C_O2_I4(r, r, r, r, rI, rZM) diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h index 298ca20d5b..f3bf030bc3 100644 --- a/tcg/ppc/tcg-target-con-str.h +++ b/tcg/ppc/tcg-target-con-str.h @@ -14,8 +14,6 @@ REGS('A', 1u << TCG_REG_R3) REGS('B', 1u << TCG_REG_R4) REGS('C', 1u << TCG_REG_R5) REGS('D', 1u << TCG_REG_R6) -REGS('L', ALL_QLOAD_REGS) -REGS('S', ALL_QSTORE_REGS) /* * Define constraint letters for constants: diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index dbba304e42..fa016c02ee 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -93,18 +93,6 @@ #define ALL_GENERAL_REGS 0xffffffffu #define ALL_VECTOR_REGS 0xffffffff00000000ull -#ifdef CONFIG_SOFTMMU -#define ALL_QLOAD_REGS \ - (ALL_GENERAL_REGS & \ - ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | (1 << TCG_REG_R5))) -#define ALL_QSTORE_REGS \ - (ALL_GENERAL_REGS & ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | \ - (1 << TCG_REG_R5) | (1 << TCG_REG_R6))) -#else -#define ALL_QLOAD_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_R3)) -#define ALL_QSTORE_REGS ALL_QLOAD_REGS -#endif - TCGPowerISA have_isa; static bool have_isel; bool have_altivec; @@ -3754,23 +3742,23 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_qemu_ld_i32: return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32 - ? C_O1_I1(r, L) - : C_O1_I2(r, L, L)); + ? C_O1_I1(r, r) + : C_O1_I2(r, r, r)); case INDEX_op_qemu_st_i32: return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32 - ? C_O0_I2(S, S) - : C_O0_I3(S, S, S)); + ? C_O0_I2(r, r) + : C_O0_I3(r, r, r)); case INDEX_op_qemu_ld_i64: - return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) - : TARGET_LONG_BITS == 32 ? C_O2_I1(L, L, L) - : C_O2_I2(L, L, L, L)); + return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) + : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r) + : C_O2_I2(r, r, r, r)); case INDEX_op_qemu_st_i64: - return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(S, S) - : TARGET_LONG_BITS == 32 ? C_O0_I3(S, S, S) - : C_O0_I4(S, S, S, S)); + return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) + : TARGET_LONG_BITS == 32 ? C_O0_I3(r, r, r) + : C_O0_I4(r, r, r, r)); case INDEX_op_add_vec: case INDEX_op_sub_vec: From 6e21aa2dcdcb647a13ef32180e6574fb702db4c3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 7 Apr 2023 14:24:08 -0700 Subject: [PATCH 36/53] tcg/ppc: Remove unused constraints A, B, C, D MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These constraints have not been used for quite some time. Fixes: 77b73de67632 ("Use rem/div[u]_i32 drop div[u]2_i32") Reviewed-by: Daniel Henrique Barboza Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target-con-str.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h index f3bf030bc3..9dcbc3df50 100644 --- a/tcg/ppc/tcg-target-con-str.h +++ b/tcg/ppc/tcg-target-con-str.h @@ -10,10 +10,6 @@ */ REGS('r', ALL_GENERAL_REGS) REGS('v', ALL_VECTOR_REGS) -REGS('A', 1u << TCG_REG_R3) -REGS('B', 1u << TCG_REG_R4) -REGS('C', 1u << TCG_REG_R5) -REGS('D', 1u << TCG_REG_R6) /* * Define constraint letters for constants: From 3dedb7201c292d340ac73fb0e52179e3690fb0c8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 1 May 2023 08:32:09 +0100 Subject: [PATCH 37/53] tcg/ppc: Remove unused constraint J MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Never used since its introduction. Fixes: 3d582c6179c ("tcg-ppc64: Rearrange integer constant constraints") Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target-con-str.h | 1 - tcg/ppc/tcg-target.c.inc | 3 --- 2 files changed, 4 deletions(-) diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h index 9dcbc3df50..094613cbcb 100644 --- a/tcg/ppc/tcg-target-con-str.h +++ b/tcg/ppc/tcg-target-con-str.h @@ -16,7 +16,6 @@ REGS('v', ALL_VECTOR_REGS) * CONST(letter, TCG_CT_CONST_* bit set) */ CONST('I', TCG_CT_CONST_S16) -CONST('J', TCG_CT_CONST_U16) CONST('M', TCG_CT_CONST_MONE) CONST('T', TCG_CT_CONST_S32) CONST('U', TCG_CT_CONST_U32) diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index fa016c02ee..29bfbfcc61 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -83,7 +83,6 @@ #define SZR (TCG_TARGET_REG_BITS / 8) #define TCG_CT_CONST_S16 0x100 -#define TCG_CT_CONST_U16 0x200 #define TCG_CT_CONST_S32 0x400 #define TCG_CT_CONST_U32 0x800 #define TCG_CT_CONST_ZERO 0x1000 @@ -270,8 +269,6 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { return 1; - } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) { - return 1; } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { return 1; } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { From f0f43534f7f5beb92788951da6944faad154c6a2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 3 Apr 2023 19:47:55 +0000 Subject: [PATCH 38/53] tcg/riscv: Simplify constraints on qemu_ld/st MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The softmmu tlb uses TCG_REG_TMP[0-2], not any of the normally available registers. Now that we handle overlap betwen inputs and helper arguments, we can allow any allocatable reg. Reviewed-by: Alex Bennée Reviewed-by: Daniel Henrique Barboza Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target-con-set.h | 2 -- tcg/riscv/tcg-target-con-str.h | 1 - tcg/riscv/tcg-target.c.inc | 16 +++------------- 3 files changed, 3 insertions(+), 16 deletions(-) diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h index d4cff673b0..d88888d3ac 100644 --- a/tcg/riscv/tcg-target-con-set.h +++ b/tcg/riscv/tcg-target-con-set.h @@ -10,10 +10,8 @@ * tcg-target-con-str.h; the constraint combination is inclusive or. */ C_O0_I1(r) -C_O0_I2(LZ, L) C_O0_I2(rZ, r) C_O0_I2(rZ, rZ) -C_O1_I1(r, L) C_O1_I1(r, r) C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) diff --git a/tcg/riscv/tcg-target-con-str.h b/tcg/riscv/tcg-target-con-str.h index 8d8afaee53..6f1cfb976c 100644 --- a/tcg/riscv/tcg-target-con-str.h +++ b/tcg/riscv/tcg-target-con-str.h @@ -9,7 +9,6 @@ * REGS(letter, register_mask) */ REGS('r', ALL_GENERAL_REGS) -REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS) /* * Define constraint letters for constants: diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index c22d1e35ac..d12b824d8c 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -125,17 +125,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define TCG_CT_CONST_N12 0x400 #define TCG_CT_CONST_M12 0x800 -#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) -/* - * For softmmu, we need to avoid conflicts with the first 5 - * argument registers to call the helper. Some of these are - * also used for the tlb lookup. - */ -#ifdef CONFIG_SOFTMMU -#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_A0, 5) -#else -#define SOFTMMU_RESERVE_REGS 0 -#endif +#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) #define sextreg sextract64 @@ -1600,10 +1590,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: - return C_O1_I1(r, L); + return C_O1_I1(r, r); case INDEX_op_qemu_st_i32: case INDEX_op_qemu_st_i64: - return C_O0_I2(LZ, L); + return C_O0_I2(rZ, r); default: g_assert_not_reached(); From 8b1b45971ff6a2d98d2737279fbbc4173e0dbe8c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 3 Apr 2023 06:14:53 +0000 Subject: [PATCH 39/53] tcg/s390x: Use ALGFR in constructing softmmu host address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than zero-extend the guest address into a register, use an add instruction which zero-extends the second input. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/s390x/tcg-target.c.inc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index dfcf4d9e34..dd13326670 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -149,6 +149,7 @@ typedef enum S390Opcode { RRE_ALGR = 0xb90a, RRE_ALCR = 0xb998, RRE_ALCGR = 0xb988, + RRE_ALGFR = 0xb91a, RRE_CGR = 0xb920, RRE_CLGR = 0xb921, RRE_DLGR = 0xb987, @@ -1853,10 +1854,11 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_insn(s, RXY, LG, h->index, TCG_REG_R2, TCG_REG_NONE, offsetof(CPUTLBEntry, addend)); - h->base = addr_reg; if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_R3, addr_reg); - h->base = TCG_REG_R3; + tcg_out_insn(s, RRE, ALGFR, h->index, addr_reg); + h->base = TCG_REG_NONE; + } else { + h->base = addr_reg; } h->disp = 0; #else From 94901422840c2a33e7889fd87540e65bc9028283 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 7 Apr 2023 14:16:12 -0700 Subject: [PATCH 40/53] tcg/s390x: Simplify constraints on qemu_ld/st MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adjust the softmmu tlb to use R0+R1, not any of the normally available registers. Since we handle overlap betwen inputs and helper arguments, we can allow any allocatable reg. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/s390x/tcg-target-con-set.h | 2 -- tcg/s390x/tcg-target-con-str.h | 1 - tcg/s390x/tcg-target.c.inc | 36 ++++++++++++---------------------- 3 files changed, 12 insertions(+), 27 deletions(-) diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h index 15f1c55103..ecc079bb6d 100644 --- a/tcg/s390x/tcg-target-con-set.h +++ b/tcg/s390x/tcg-target-con-set.h @@ -10,12 +10,10 @@ * tcg-target-con-str.h; the constraint combination is inclusive or. */ C_O0_I1(r) -C_O0_I2(L, L) C_O0_I2(r, r) C_O0_I2(r, ri) C_O0_I2(r, rA) C_O0_I2(v, r) -C_O1_I1(r, L) C_O1_I1(r, r) C_O1_I1(v, r) C_O1_I1(v, v) diff --git a/tcg/s390x/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h index 6fa64a1ed6..25675b449e 100644 --- a/tcg/s390x/tcg-target-con-str.h +++ b/tcg/s390x/tcg-target-con-str.h @@ -9,7 +9,6 @@ * REGS(letter, register_mask) */ REGS('r', ALL_GENERAL_REGS) -REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS) REGS('v', ALL_VECTOR_REGS) REGS('o', 0xaaaa) /* odd numbered general regs */ diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index dd13326670..aacbaf21d5 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -44,18 +44,6 @@ #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16) #define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32) -/* - * For softmmu, we need to avoid conflicts with the first 3 - * argument registers to perform the tlb lookup, and to call - * the helper function. - */ -#ifdef CONFIG_SOFTMMU -#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3) -#else -#define SOFTMMU_RESERVE_REGS 0 -#endif - - /* Several places within the instruction set 0 means "no register" rather than TCG_REG_R0. */ #define TCG_REG_NONE 0 @@ -1814,13 +1802,13 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, ldst->oi = oi; ldst->addrlo_reg = addr_reg; - tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE, + tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19)); - tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off); - tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off); + tcg_out_insn(s, RXY, NG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, mask_off); + tcg_out_insn(s, RXY, AG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, table_off); /* * For aligned accesses, we check the first byte and include the alignment @@ -1830,10 +1818,10 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask); tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; if (a_off == 0) { - tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask); + tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask); } else { - tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off); - tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); + tcg_out_insn(s, RX, LA, TCG_REG_R0, addr_reg, TCG_REG_NONE, a_off); + tgen_andi(s, TCG_TYPE_TL, TCG_REG_R0, tlb_mask); } if (is_ld) { @@ -1842,16 +1830,16 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, ofs = offsetof(CPUTLBEntry, addr_write); } if (TARGET_LONG_BITS == 32) { - tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs); + tcg_out_insn(s, RX, C, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs); } else { - tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs); + tcg_out_insn(s, RXY, CG, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs); } tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); ldst->label_ptr[0] = s->code_ptr++; - h->index = TCG_REG_R2; - tcg_out_insn(s, RXY, LG, h->index, TCG_REG_R2, TCG_REG_NONE, + h->index = TCG_TMP0; + tcg_out_insn(s, RXY, LG, h->index, TCG_TMP0, TCG_REG_NONE, offsetof(CPUTLBEntry, addend)); if (TARGET_LONG_BITS == 32) { @@ -3155,10 +3143,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: - return C_O1_I1(r, L); + return C_O1_I1(r, r); case INDEX_op_qemu_st_i64: case INDEX_op_qemu_st_i32: - return C_O0_I2(L, L); + return C_O0_I2(r, r); case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: From 3ec02c1f0f539ea117817484f4a352010edaf9e2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 May 2023 10:01:56 +0100 Subject: [PATCH 41/53] target/mips: Add MO_ALIGN to gen_llwp, gen_scwp These are atomic operations, so mark as requiring alignment. Signed-off-by: Richard Henderson --- target/mips/tcg/nanomips_translate.c.inc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/target/mips/tcg/nanomips_translate.c.inc b/target/mips/tcg/nanomips_translate.c.inc index 97b9572caa..e08343414c 100644 --- a/target/mips/tcg/nanomips_translate.c.inc +++ b/target/mips/tcg/nanomips_translate.c.inc @@ -998,7 +998,7 @@ static void gen_llwp(DisasContext *ctx, uint32_t base, int16_t offset, TCGv tmp2 = tcg_temp_new(); gen_base_offset_addr(ctx, taddr, base, offset); - tcg_gen_qemu_ld_i64(tval, taddr, ctx->mem_idx, MO_TEUQ); + tcg_gen_qemu_ld_i64(tval, taddr, ctx->mem_idx, MO_TEUQ | MO_ALIGN); if (cpu_is_bigendian(ctx)) { tcg_gen_extr_i64_tl(tmp2, tmp1, tval); } else { @@ -1039,7 +1039,8 @@ static void gen_scwp(DisasContext *ctx, uint32_t base, int16_t offset, tcg_gen_ld_i64(llval, cpu_env, offsetof(CPUMIPSState, llval_wp)); tcg_gen_atomic_cmpxchg_i64(val, taddr, llval, tval, - eva ? MIPS_HFLAG_UM : ctx->mem_idx, MO_64); + eva ? MIPS_HFLAG_UM : ctx->mem_idx, + MO_64 | MO_ALIGN); if (reg1 != 0) { tcg_gen_movi_tl(cpu_gpr[reg1], 1); } From 0d5bede4680c88a02213c3cc13dd7a74453a71b5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 May 2023 10:22:23 +0100 Subject: [PATCH 42/53] target/mips: Add missing default_tcg_memop_mask Memory operations that are not already aligned, or otherwise marked up, require addition of ctx->default_tcg_memop_mask. Signed-off-by: Richard Henderson --- target/mips/tcg/micromips_translate.c.inc | 24 ++++++++++++++-------- target/mips/tcg/mips16e_translate.c.inc | 18 ++++++++++------ target/mips/tcg/mxu_translate.c | 3 ++- target/mips/tcg/nanomips_translate.c.inc | 25 +++++++++++------------ 4 files changed, 42 insertions(+), 28 deletions(-) diff --git a/target/mips/tcg/micromips_translate.c.inc b/target/mips/tcg/micromips_translate.c.inc index e8b193aeda..211d102cf6 100644 --- a/target/mips/tcg/micromips_translate.c.inc +++ b/target/mips/tcg/micromips_translate.c.inc @@ -977,20 +977,24 @@ static void gen_ldst_pair(DisasContext *ctx, uint32_t opc, int rd, gen_reserved_instruction(ctx); return; } - tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL); + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL | + ctx->default_tcg_memop_mask); gen_store_gpr(t1, rd); tcg_gen_movi_tl(t1, 4); gen_op_addr_add(ctx, t0, t0, t1); - tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL); + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL | + ctx->default_tcg_memop_mask); gen_store_gpr(t1, rd + 1); break; case SWP: gen_load_gpr(t1, rd); - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | + ctx->default_tcg_memop_mask); tcg_gen_movi_tl(t1, 4); gen_op_addr_add(ctx, t0, t0, t1); gen_load_gpr(t1, rd + 1); - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | + ctx->default_tcg_memop_mask); break; #ifdef TARGET_MIPS64 case LDP: @@ -998,20 +1002,24 @@ static void gen_ldst_pair(DisasContext *ctx, uint32_t opc, int rd, gen_reserved_instruction(ctx); return; } - tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ); + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ | + ctx->default_tcg_memop_mask); gen_store_gpr(t1, rd); tcg_gen_movi_tl(t1, 8); gen_op_addr_add(ctx, t0, t0, t1); - tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ); + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ | + ctx->default_tcg_memop_mask); gen_store_gpr(t1, rd + 1); break; case SDP: gen_load_gpr(t1, rd); - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ | + ctx->default_tcg_memop_mask); tcg_gen_movi_tl(t1, 8); gen_op_addr_add(ctx, t0, t0, t1); gen_load_gpr(t1, rd + 1); - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ | + ctx->default_tcg_memop_mask); break; #endif } diff --git a/target/mips/tcg/mips16e_translate.c.inc b/target/mips/tcg/mips16e_translate.c.inc index 602f5f0c02..5cffe0e412 100644 --- a/target/mips/tcg/mips16e_translate.c.inc +++ b/target/mips/tcg/mips16e_translate.c.inc @@ -172,22 +172,26 @@ static void gen_mips16_save(DisasContext *ctx, case 4: gen_base_offset_addr(ctx, t0, 29, 12); gen_load_gpr(t1, 7); - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | + ctx->default_tcg_memop_mask); /* Fall through */ case 3: gen_base_offset_addr(ctx, t0, 29, 8); gen_load_gpr(t1, 6); - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | + ctx->default_tcg_memop_mask); /* Fall through */ case 2: gen_base_offset_addr(ctx, t0, 29, 4); gen_load_gpr(t1, 5); - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | + ctx->default_tcg_memop_mask); /* Fall through */ case 1: gen_base_offset_addr(ctx, t0, 29, 0); gen_load_gpr(t1, 4); - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | + ctx->default_tcg_memop_mask); } gen_load_gpr(t0, 29); @@ -196,7 +200,8 @@ static void gen_mips16_save(DisasContext *ctx, tcg_gen_movi_tl(t2, -4); \ gen_op_addr_add(ctx, t0, t0, t2); \ gen_load_gpr(t1, reg); \ - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); \ + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | \ + ctx->default_tcg_memop_mask); \ } while (0) if (do_ra) { @@ -298,7 +303,8 @@ static void gen_mips16_restore(DisasContext *ctx, #define DECR_AND_LOAD(reg) do { \ tcg_gen_movi_tl(t2, -4); \ gen_op_addr_add(ctx, t0, t0, t2); \ - tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL); \ + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL | \ + ctx->default_tcg_memop_mask); \ gen_store_gpr(t1, reg); \ } while (0) diff --git a/target/mips/tcg/mxu_translate.c b/target/mips/tcg/mxu_translate.c index bdd20709c0..be038b5f07 100644 --- a/target/mips/tcg/mxu_translate.c +++ b/target/mips/tcg/mxu_translate.c @@ -831,7 +831,8 @@ static void gen_mxu_s32ldd_s32lddr(DisasContext *ctx) tcg_gen_ori_tl(t1, t1, 0xFFFFF000); } tcg_gen_add_tl(t1, t0, t1); - tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, MO_TESL ^ (sel * MO_BSWAP)); + tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, (MO_TESL ^ (sel * MO_BSWAP)) | + ctx->default_tcg_memop_mask); gen_store_mxu_gpr(t1, XRa); } diff --git a/target/mips/tcg/nanomips_translate.c.inc b/target/mips/tcg/nanomips_translate.c.inc index e08343414c..b96dcd2ae9 100644 --- a/target/mips/tcg/nanomips_translate.c.inc +++ b/target/mips/tcg/nanomips_translate.c.inc @@ -2641,52 +2641,49 @@ static void gen_p_lsx(DisasContext *ctx, int rd, int rs, int rt) switch (extract32(ctx->opcode, 7, 4)) { case NM_LBX: - tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, - MO_SB); + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB); gen_store_gpr(t0, rd); break; case NM_LHX: /*case NM_LHXS:*/ tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, - MO_TESW); + MO_TESW | ctx->default_tcg_memop_mask); gen_store_gpr(t0, rd); break; case NM_LWX: /*case NM_LWXS:*/ tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, - MO_TESL); + MO_TESL | ctx->default_tcg_memop_mask); gen_store_gpr(t0, rd); break; case NM_LBUX: - tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, - MO_UB); + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_UB); gen_store_gpr(t0, rd); break; case NM_LHUX: /*case NM_LHUXS:*/ tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, - MO_TEUW); + MO_TEUW | ctx->default_tcg_memop_mask); gen_store_gpr(t0, rd); break; case NM_SBX: check_nms(ctx); gen_load_gpr(t1, rd); - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, - MO_8); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_8); break; case NM_SHX: /*case NM_SHXS:*/ check_nms(ctx); gen_load_gpr(t1, rd); tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, - MO_TEUW); + MO_TEUW | ctx->default_tcg_memop_mask); break; case NM_SWX: /*case NM_SWXS:*/ check_nms(ctx); gen_load_gpr(t1, rd); tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, - MO_TEUL); + MO_TEUL | ctx->default_tcg_memop_mask); break; case NM_LWC1X: /*case NM_LWC1XS:*/ @@ -3739,7 +3736,8 @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx) addr_off); tcg_gen_movi_tl(t0, addr); - tcg_gen_qemu_ld_tl(cpu_gpr[rt], t0, ctx->mem_idx, MO_TESL); + tcg_gen_qemu_ld_tl(cpu_gpr[rt], t0, ctx->mem_idx, + MO_TESL | ctx->default_tcg_memop_mask); } break; case NM_SWPC48: @@ -3755,7 +3753,8 @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx) tcg_gen_movi_tl(t0, addr); gen_load_gpr(t1, rt); - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, + MO_TEUL | ctx->default_tcg_memop_mask); } break; default: From fc49723769dca4edf1348e1eec96ad4b810260ba Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 May 2023 10:23:34 +0100 Subject: [PATCH 43/53] target/mips: Use MO_ALIGN instead of 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The opposite of MO_UNALN is MO_ALIGN. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/mips/tcg/nanomips_translate.c.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/mips/tcg/nanomips_translate.c.inc b/target/mips/tcg/nanomips_translate.c.inc index b96dcd2ae9..a98dde0d2e 100644 --- a/target/mips/tcg/nanomips_translate.c.inc +++ b/target/mips/tcg/nanomips_translate.c.inc @@ -4305,7 +4305,7 @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx) TCGv va = tcg_temp_new(); TCGv t1 = tcg_temp_new(); MemOp memop = (extract32(ctx->opcode, 8, 3)) == - NM_P_LS_UAWM ? MO_UNALN : 0; + NM_P_LS_UAWM ? MO_UNALN : MO_ALIGN; count = (count == 0) ? 8 : count; while (counter != count) { From 0e85e81b42e52e6e584aebe98c5b187653a577c1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 May 2023 10:32:13 +0100 Subject: [PATCH 44/53] target/mips: Remove TARGET_ALIGNED_ONLY Signed-off-by: Richard Henderson --- configs/targets/mips-linux-user.mak | 1 - configs/targets/mips-softmmu.mak | 1 - configs/targets/mips64-linux-user.mak | 1 - configs/targets/mips64-softmmu.mak | 1 - configs/targets/mips64el-linux-user.mak | 1 - configs/targets/mips64el-softmmu.mak | 1 - configs/targets/mipsel-linux-user.mak | 1 - configs/targets/mipsel-softmmu.mak | 1 - configs/targets/mipsn32-linux-user.mak | 1 - configs/targets/mipsn32el-linux-user.mak | 1 - 10 files changed, 10 deletions(-) diff --git a/configs/targets/mips-linux-user.mak b/configs/targets/mips-linux-user.mak index 71fa77d464..b4569a9893 100644 --- a/configs/targets/mips-linux-user.mak +++ b/configs/targets/mips-linux-user.mak @@ -2,5 +2,4 @@ TARGET_ARCH=mips TARGET_ABI_MIPSO32=y TARGET_SYSTBL_ABI=o32 TARGET_SYSTBL=syscall_o32.tbl -TARGET_ALIGNED_ONLY=y TARGET_BIG_ENDIAN=y diff --git a/configs/targets/mips-softmmu.mak b/configs/targets/mips-softmmu.mak index 7787a4d94c..d34b4083fc 100644 --- a/configs/targets/mips-softmmu.mak +++ b/configs/targets/mips-softmmu.mak @@ -1,4 +1,3 @@ TARGET_ARCH=mips -TARGET_ALIGNED_ONLY=y TARGET_BIG_ENDIAN=y TARGET_SUPPORTS_MTTCG=y diff --git a/configs/targets/mips64-linux-user.mak b/configs/targets/mips64-linux-user.mak index 5a4771f22d..d2ff509a11 100644 --- a/configs/targets/mips64-linux-user.mak +++ b/configs/targets/mips64-linux-user.mak @@ -3,5 +3,4 @@ TARGET_ABI_MIPSN64=y TARGET_BASE_ARCH=mips TARGET_SYSTBL_ABI=n64 TARGET_SYSTBL=syscall_n64.tbl -TARGET_ALIGNED_ONLY=y TARGET_BIG_ENDIAN=y diff --git a/configs/targets/mips64-softmmu.mak b/configs/targets/mips64-softmmu.mak index 568d66650c..12d9483bf0 100644 --- a/configs/targets/mips64-softmmu.mak +++ b/configs/targets/mips64-softmmu.mak @@ -1,4 +1,3 @@ TARGET_ARCH=mips64 TARGET_BASE_ARCH=mips -TARGET_ALIGNED_ONLY=y TARGET_BIG_ENDIAN=y diff --git a/configs/targets/mips64el-linux-user.mak b/configs/targets/mips64el-linux-user.mak index f348f35997..f9efeec8ea 100644 --- a/configs/targets/mips64el-linux-user.mak +++ b/configs/targets/mips64el-linux-user.mak @@ -3,4 +3,3 @@ TARGET_ABI_MIPSN64=y TARGET_BASE_ARCH=mips TARGET_SYSTBL_ABI=n64 TARGET_SYSTBL=syscall_n64.tbl -TARGET_ALIGNED_ONLY=y diff --git a/configs/targets/mips64el-softmmu.mak b/configs/targets/mips64el-softmmu.mak index 5a52aa4b64..8d9ab3ddc4 100644 --- a/configs/targets/mips64el-softmmu.mak +++ b/configs/targets/mips64el-softmmu.mak @@ -1,4 +1,3 @@ TARGET_ARCH=mips64 TARGET_BASE_ARCH=mips -TARGET_ALIGNED_ONLY=y TARGET_NEED_FDT=y diff --git a/configs/targets/mipsel-linux-user.mak b/configs/targets/mipsel-linux-user.mak index e23793070c..e8d7241d31 100644 --- a/configs/targets/mipsel-linux-user.mak +++ b/configs/targets/mipsel-linux-user.mak @@ -2,4 +2,3 @@ TARGET_ARCH=mips TARGET_ABI_MIPSO32=y TARGET_SYSTBL_ABI=o32 TARGET_SYSTBL=syscall_o32.tbl -TARGET_ALIGNED_ONLY=y diff --git a/configs/targets/mipsel-softmmu.mak b/configs/targets/mipsel-softmmu.mak index c7c41f4fb7..0829659fc2 100644 --- a/configs/targets/mipsel-softmmu.mak +++ b/configs/targets/mipsel-softmmu.mak @@ -1,3 +1,2 @@ TARGET_ARCH=mips -TARGET_ALIGNED_ONLY=y TARGET_SUPPORTS_MTTCG=y diff --git a/configs/targets/mipsn32-linux-user.mak b/configs/targets/mipsn32-linux-user.mak index 1e80b302fc..206095da64 100644 --- a/configs/targets/mipsn32-linux-user.mak +++ b/configs/targets/mipsn32-linux-user.mak @@ -4,5 +4,4 @@ TARGET_ABI32=y TARGET_BASE_ARCH=mips TARGET_SYSTBL_ABI=n32 TARGET_SYSTBL=syscall_n32.tbl -TARGET_ALIGNED_ONLY=y TARGET_BIG_ENDIAN=y diff --git a/configs/targets/mipsn32el-linux-user.mak b/configs/targets/mipsn32el-linux-user.mak index f31a9c394b..ca2a3ed753 100644 --- a/configs/targets/mipsn32el-linux-user.mak +++ b/configs/targets/mipsn32el-linux-user.mak @@ -4,4 +4,3 @@ TARGET_ABI32=y TARGET_BASE_ARCH=mips TARGET_SYSTBL_ABI=n32 TARGET_SYSTBL=syscall_n32.tbl -TARGET_ALIGNED_ONLY=y From a978c37b275a6f7fb21d5bf6d46dca56b96ec7b0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 May 2023 10:38:52 +0100 Subject: [PATCH 45/53] target/nios2: Remove TARGET_ALIGNED_ONLY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In gen_ldx/gen_stx, the only two locations for memory operations, mark the operation as either aligned (softmmu) or unaligned (user-only, as if emulated by the kernel). Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- configs/targets/nios2-softmmu.mak | 1 - target/nios2/translate.c | 10 ++++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/configs/targets/nios2-softmmu.mak b/configs/targets/nios2-softmmu.mak index 5823fc02c8..c99ae3777e 100644 --- a/configs/targets/nios2-softmmu.mak +++ b/configs/targets/nios2-softmmu.mak @@ -1,3 +1,2 @@ TARGET_ARCH=nios2 -TARGET_ALIGNED_ONLY=y TARGET_NEED_FDT=y diff --git a/target/nios2/translate.c b/target/nios2/translate.c index 6610e22236..a548e16ed5 100644 --- a/target/nios2/translate.c +++ b/target/nios2/translate.c @@ -298,6 +298,11 @@ static void gen_ldx(DisasContext *dc, uint32_t code, uint32_t flags) TCGv data = dest_gpr(dc, instr.b); tcg_gen_addi_tl(addr, load_gpr(dc, instr.a), instr.imm16.s); +#ifdef CONFIG_USER_ONLY + flags |= MO_UNALN; +#else + flags |= MO_ALIGN; +#endif tcg_gen_qemu_ld_tl(data, addr, dc->mem_idx, flags); } @@ -309,6 +314,11 @@ static void gen_stx(DisasContext *dc, uint32_t code, uint32_t flags) TCGv addr = tcg_temp_new(); tcg_gen_addi_tl(addr, load_gpr(dc, instr.a), instr.imm16.s); +#ifdef CONFIG_USER_ONLY + flags |= MO_UNALN; +#else + flags |= MO_ALIGN; +#endif tcg_gen_qemu_st_tl(val, addr, dc->mem_idx, flags); } From 03a0d87e8ddc2577151b02d5b93c577ac48b133a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 May 2023 10:50:35 +0100 Subject: [PATCH 46/53] target/sh4: Use MO_ALIGN where required MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark all memory operations that are not already marked with UNALIGN. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sh4/translate.c | 102 ++++++++++++++++++++++++++--------------- 1 file changed, 66 insertions(+), 36 deletions(-) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 6e40d5dd6a..0dedbb8210 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -527,13 +527,15 @@ static void _decode_opc(DisasContext * ctx) case 0x9000: /* mov.w @(disp,PC),Rn */ { TCGv addr = tcg_constant_i32(ctx->base.pc_next + 4 + B7_0 * 2); - tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESW); + tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, + MO_TESW | MO_ALIGN); } return; case 0xd000: /* mov.l @(disp,PC),Rn */ { TCGv addr = tcg_constant_i32((ctx->base.pc_next + 4 + B7_0 * 4) & ~3); - tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, + MO_TESL | MO_ALIGN); } return; case 0x7000: /* add #imm,Rn */ @@ -801,9 +803,11 @@ static void _decode_opc(DisasContext * ctx) { TCGv arg0, arg1; arg0 = tcg_temp_new(); - tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, + MO_TESL | MO_ALIGN); arg1 = tcg_temp_new(); - tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, + MO_TESL | MO_ALIGN); gen_helper_macl(cpu_env, arg0, arg1); tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4); tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4); @@ -813,9 +817,11 @@ static void _decode_opc(DisasContext * ctx) { TCGv arg0, arg1; arg0 = tcg_temp_new(); - tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, + MO_TESL | MO_ALIGN); arg1 = tcg_temp_new(); - tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, + MO_TESL | MO_ALIGN); gen_helper_macw(cpu_env, arg0, arg1); tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 2); tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 2); @@ -961,30 +967,36 @@ static void _decode_opc(DisasContext * ctx) if (ctx->tbflags & FPSCR_SZ) { TCGv_i64 fp = tcg_temp_new_i64(); gen_load_fpr64(ctx, fp, XHACK(B7_4)); - tcg_gen_qemu_st_i64(fp, REG(B11_8), ctx->memidx, MO_TEUQ); + tcg_gen_qemu_st_i64(fp, REG(B11_8), ctx->memidx, + MO_TEUQ | MO_ALIGN); } else { - tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx, + MO_TEUL | MO_ALIGN); } return; case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED if (ctx->tbflags & FPSCR_SZ) { TCGv_i64 fp = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEUQ); + tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, + MO_TEUQ | MO_ALIGN); gen_store_fpr64(ctx, fp, XHACK(B11_8)); } else { - tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL); + tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, + MO_TEUL | MO_ALIGN); } return; case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED if (ctx->tbflags & FPSCR_SZ) { TCGv_i64 fp = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEUQ); + tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, + MO_TEUQ | MO_ALIGN); gen_store_fpr64(ctx, fp, XHACK(B11_8)); tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 8); } else { - tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL); + tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, + MO_TEUL | MO_ALIGN); tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4); } return; @@ -996,10 +1008,12 @@ static void _decode_opc(DisasContext * ctx) TCGv_i64 fp = tcg_temp_new_i64(); gen_load_fpr64(ctx, fp, XHACK(B7_4)); tcg_gen_subi_i32(addr, REG(B11_8), 8); - tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEUQ); + tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, + MO_TEUQ | MO_ALIGN); } else { tcg_gen_subi_i32(addr, REG(B11_8), 4); - tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, + MO_TEUL | MO_ALIGN); } tcg_gen_mov_i32(REG(B11_8), addr); } @@ -1011,10 +1025,12 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_add_i32(addr, REG(B7_4), REG(0)); if (ctx->tbflags & FPSCR_SZ) { TCGv_i64 fp = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(fp, addr, ctx->memidx, MO_TEUQ); + tcg_gen_qemu_ld_i64(fp, addr, ctx->memidx, + MO_TEUQ | MO_ALIGN); gen_store_fpr64(ctx, fp, XHACK(B11_8)); } else { - tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx, MO_TEUL); + tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx, + MO_TEUL | MO_ALIGN); } } return; @@ -1026,9 +1042,11 @@ static void _decode_opc(DisasContext * ctx) if (ctx->tbflags & FPSCR_SZ) { TCGv_i64 fp = tcg_temp_new_i64(); gen_load_fpr64(ctx, fp, XHACK(B7_4)); - tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEUQ); + tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, + MO_TEUQ | MO_ALIGN); } else { - tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, + MO_TEUL | MO_ALIGN); } } return; @@ -1158,14 +1176,14 @@ static void _decode_opc(DisasContext * ctx) { TCGv addr = tcg_temp_new(); tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 2); - tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESW); + tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESW | MO_ALIGN); } return; case 0xc600: /* mov.l @(disp,GBR),R0 */ { TCGv addr = tcg_temp_new(); tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 4); - tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESL | MO_ALIGN); } return; case 0xc000: /* mov.b R0,@(disp,GBR) */ @@ -1179,14 +1197,14 @@ static void _decode_opc(DisasContext * ctx) { TCGv addr = tcg_temp_new(); tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 2); - tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUW); + tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUW | MO_ALIGN); } return; case 0xc200: /* mov.l R0,@(disp,GBR) */ { TCGv addr = tcg_temp_new(); tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 4); - tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUL | MO_ALIGN); } return; case 0x8000: /* mov.b R0,@(disp,Rn) */ @@ -1286,7 +1304,8 @@ static void _decode_opc(DisasContext * ctx) return; case 0x4087: /* ldc.l @Rm+,Rn_BANK */ CHECK_PRIVILEGED - tcg_gen_qemu_ld_i32(ALTREG(B6_4), REG(B11_8), ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(ALTREG(B6_4), REG(B11_8), ctx->memidx, + MO_TESL | MO_ALIGN); tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4); return; case 0x0082: /* stc Rm_BANK,Rn */ @@ -1298,7 +1317,8 @@ static void _decode_opc(DisasContext * ctx) { TCGv addr = tcg_temp_new(); tcg_gen_subi_i32(addr, REG(B11_8), 4); - tcg_gen_qemu_st_i32(ALTREG(B6_4), addr, ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(ALTREG(B6_4), addr, ctx->memidx, + MO_TEUL | MO_ALIGN); tcg_gen_mov_i32(REG(B11_8), addr); } return; @@ -1354,7 +1374,8 @@ static void _decode_opc(DisasContext * ctx) CHECK_PRIVILEGED { TCGv val = tcg_temp_new(); - tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, + MO_TESL | MO_ALIGN); tcg_gen_andi_i32(val, val, 0x700083f3); gen_write_sr(val); tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4); @@ -1372,7 +1393,7 @@ static void _decode_opc(DisasContext * ctx) TCGv val = tcg_temp_new(); tcg_gen_subi_i32(addr, REG(B11_8), 4); gen_read_sr(val); - tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL | MO_ALIGN); tcg_gen_mov_i32(REG(B11_8), addr); } return; @@ -1383,7 +1404,8 @@ static void _decode_opc(DisasContext * ctx) return; \ case ldpnum: \ prechk \ - tcg_gen_qemu_ld_i32(cpu_##reg, REG(B11_8), ctx->memidx, MO_TESL); \ + tcg_gen_qemu_ld_i32(cpu_##reg, REG(B11_8), ctx->memidx, \ + MO_TESL | MO_ALIGN); \ tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4); \ return; #define ST(reg,stnum,stpnum,prechk) \ @@ -1396,7 +1418,8 @@ static void _decode_opc(DisasContext * ctx) { \ TCGv addr = tcg_temp_new(); \ tcg_gen_subi_i32(addr, REG(B11_8), 4); \ - tcg_gen_qemu_st_i32(cpu_##reg, addr, ctx->memidx, MO_TEUL); \ + tcg_gen_qemu_st_i32(cpu_##reg, addr, ctx->memidx, \ + MO_TEUL | MO_ALIGN); \ tcg_gen_mov_i32(REG(B11_8), addr); \ } \ return; @@ -1423,7 +1446,8 @@ static void _decode_opc(DisasContext * ctx) CHECK_FPU_ENABLED { TCGv addr = tcg_temp_new(); - tcg_gen_qemu_ld_i32(addr, REG(B11_8), ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(addr, REG(B11_8), ctx->memidx, + MO_TESL | MO_ALIGN); tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4); gen_helper_ld_fpscr(cpu_env, addr); ctx->base.is_jmp = DISAS_STOP; @@ -1441,16 +1465,18 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_andi_i32(val, cpu_fpscr, 0x003fffff); addr = tcg_temp_new(); tcg_gen_subi_i32(addr, REG(B11_8), 4); - tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL | MO_ALIGN); tcg_gen_mov_i32(REG(B11_8), addr); } return; case 0x00c3: /* movca.l R0,@Rm */ { TCGv val = tcg_temp_new(); - tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TEUL); + tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, + MO_TEUL | MO_ALIGN); gen_helper_movcal(cpu_env, REG(B11_8), val); - tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, + MO_TEUL | MO_ALIGN); } ctx->has_movcal = 1; return; @@ -1492,11 +1518,13 @@ static void _decode_opc(DisasContext * ctx) cpu_lock_addr, fail); tmp = tcg_temp_new(); tcg_gen_atomic_cmpxchg_i32(tmp, REG(B11_8), cpu_lock_value, - REG(0), ctx->memidx, MO_TEUL); + REG(0), ctx->memidx, + MO_TEUL | MO_ALIGN); tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, tmp, cpu_lock_value); } else { tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_lock_addr, -1, fail); - tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, + MO_TEUL | MO_ALIGN); tcg_gen_movi_i32(cpu_sr_t, 1); } tcg_gen_br(done); @@ -1521,11 +1549,13 @@ static void _decode_opc(DisasContext * ctx) if ((tb_cflags(ctx->base.tb) & CF_PARALLEL)) { TCGv tmp = tcg_temp_new(); tcg_gen_mov_i32(tmp, REG(B11_8)); - tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, + MO_TESL | MO_ALIGN); tcg_gen_mov_i32(cpu_lock_value, REG(0)); tcg_gen_mov_i32(cpu_lock_addr, tmp); } else { - tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, + MO_TESL | MO_ALIGN); tcg_gen_movi_i32(cpu_lock_addr, 0); } return; From 8244189419f9bf290b03a5993401d722533de972 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 May 2023 11:18:14 +0100 Subject: [PATCH 47/53] target/sh4: Remove TARGET_ALIGNED_ONLY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- configs/targets/sh4-linux-user.mak | 1 - configs/targets/sh4-softmmu.mak | 1 - configs/targets/sh4eb-linux-user.mak | 1 - configs/targets/sh4eb-softmmu.mak | 1 - 4 files changed, 4 deletions(-) diff --git a/configs/targets/sh4-linux-user.mak b/configs/targets/sh4-linux-user.mak index 0152d6621e..9908887566 100644 --- a/configs/targets/sh4-linux-user.mak +++ b/configs/targets/sh4-linux-user.mak @@ -1,5 +1,4 @@ TARGET_ARCH=sh4 TARGET_SYSTBL_ABI=common TARGET_SYSTBL=syscall.tbl -TARGET_ALIGNED_ONLY=y TARGET_HAS_BFLT=y diff --git a/configs/targets/sh4-softmmu.mak b/configs/targets/sh4-softmmu.mak index 95896376c4..f9d62d91e4 100644 --- a/configs/targets/sh4-softmmu.mak +++ b/configs/targets/sh4-softmmu.mak @@ -1,2 +1 @@ TARGET_ARCH=sh4 -TARGET_ALIGNED_ONLY=y diff --git a/configs/targets/sh4eb-linux-user.mak b/configs/targets/sh4eb-linux-user.mak index 6724165efe..9db6b3609c 100644 --- a/configs/targets/sh4eb-linux-user.mak +++ b/configs/targets/sh4eb-linux-user.mak @@ -1,6 +1,5 @@ TARGET_ARCH=sh4 TARGET_SYSTBL_ABI=common TARGET_SYSTBL=syscall.tbl -TARGET_ALIGNED_ONLY=y TARGET_BIG_ENDIAN=y TARGET_HAS_BFLT=y diff --git a/configs/targets/sh4eb-softmmu.mak b/configs/targets/sh4eb-softmmu.mak index dc8b30bf7a..226b1fc698 100644 --- a/configs/targets/sh4eb-softmmu.mak +++ b/configs/targets/sh4eb-softmmu.mak @@ -1,3 +1,2 @@ TARGET_ARCH=sh4 -TARGET_ALIGNED_ONLY=y TARGET_BIG_ENDIAN=y From 1fceff9c3ca5960748f7346b3cd75e9d2620eaed Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 May 2023 16:25:02 +0100 Subject: [PATCH 48/53] tcg: Remove TARGET_ALIGNED_ONLY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All uses have now been expunged. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- include/exec/memop.h | 13 ++----------- include/exec/poison.h | 1 - tcg/tcg.c | 5 ----- 3 files changed, 2 insertions(+), 17 deletions(-) diff --git a/include/exec/memop.h b/include/exec/memop.h index 25d027434a..07f5f88188 100644 --- a/include/exec/memop.h +++ b/include/exec/memop.h @@ -47,8 +47,6 @@ typedef enum MemOp { * MO_UNALN accesses are never checked for alignment. * MO_ALIGN accesses will result in a call to the CPU's * do_unaligned_access hook if the guest address is not aligned. - * The default depends on whether the target CPU defines - * TARGET_ALIGNED_ONLY. * * Some architectures (e.g. ARMv8) need the address which is aligned * to a size more than the size of the memory access. @@ -65,21 +63,14 @@ typedef enum MemOp { */ MO_ASHIFT = 5, MO_AMASK = 0x7 << MO_ASHIFT, -#ifdef NEED_CPU_H -#ifdef TARGET_ALIGNED_ONLY - MO_ALIGN = 0, - MO_UNALN = MO_AMASK, -#else - MO_ALIGN = MO_AMASK, - MO_UNALN = 0, -#endif -#endif + MO_UNALN = 0, MO_ALIGN_2 = 1 << MO_ASHIFT, MO_ALIGN_4 = 2 << MO_ASHIFT, MO_ALIGN_8 = 3 << MO_ASHIFT, MO_ALIGN_16 = 4 << MO_ASHIFT, MO_ALIGN_32 = 5 << MO_ASHIFT, MO_ALIGN_64 = 6 << MO_ASHIFT, + MO_ALIGN = MO_AMASK, /* Combinations of the above, for ease of use. */ MO_UB = MO_8, diff --git a/include/exec/poison.h b/include/exec/poison.h index 140daa4a85..256736e11a 100644 --- a/include/exec/poison.h +++ b/include/exec/poison.h @@ -35,7 +35,6 @@ #pragma GCC poison TARGET_TRICORE #pragma GCC poison TARGET_XTENSA -#pragma GCC poison TARGET_ALIGNED_ONLY #pragma GCC poison TARGET_HAS_BFLT #pragma GCC poison TARGET_NAME #pragma GCC poison TARGET_SUPPORTS_MTTCG diff --git a/tcg/tcg.c b/tcg/tcg.c index 88fe01f59f..1231c8ab4c 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -2185,13 +2185,8 @@ static const char * const ldst_name[] = }; static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { -#ifdef TARGET_ALIGNED_ONLY [MO_UNALN >> MO_ASHIFT] = "un+", - [MO_ALIGN >> MO_ASHIFT] = "", -#else - [MO_UNALN >> MO_ASHIFT] = "", [MO_ALIGN >> MO_ASHIFT] = "al+", -#endif [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", From 9877ea05de9cdce6a5da87175d8455832f8148dc Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 26 Oct 2022 14:58:09 +1000 Subject: [PATCH 49/53] accel/tcg: Add cpu_in_serial_context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Like cpu_in_exclusive_context, but also true if there is no other cpu against which we could race. Use it in tb_flush as a direct replacement. Use it in cpu_loop_exit_atomic to ensure that there is no loop against cpu_exec_step_atomic. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec-common.c | 3 +++ accel/tcg/internal.h | 9 +++++++++ accel/tcg/tb-maint.c | 2 +- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c index e7962c9348..9a5fabf625 100644 --- a/accel/tcg/cpu-exec-common.c +++ b/accel/tcg/cpu-exec-common.c @@ -22,6 +22,7 @@ #include "sysemu/tcg.h" #include "exec/exec-all.h" #include "qemu/plugin.h" +#include "internal.h" bool tcg_allowed; @@ -81,6 +82,8 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc) void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc) { + /* Prevent looping if already executing in a serial context. */ + g_assert(!cpu_in_serial_context(cpu)); cpu->exception_index = EXCP_ATOMIC; cpu_loop_exit_restore(cpu, pc); } diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h index 7bb0fdbe14..24f225cac7 100644 --- a/accel/tcg/internal.h +++ b/accel/tcg/internal.h @@ -64,6 +64,15 @@ static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb) } } +/* + * Return true if CS is not running in parallel with other cpus, either + * because there are no other cpus or we are within an exclusive context. + */ +static inline bool cpu_in_serial_context(CPUState *cs) +{ + return !(cs->tcg_cflags & CF_PARALLEL) || cpu_in_exclusive_context(cs); +} + extern int64_t max_delay; extern int64_t max_advance; diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c index 0dd173fbf0..991746f80f 100644 --- a/accel/tcg/tb-maint.c +++ b/accel/tcg/tb-maint.c @@ -760,7 +760,7 @@ void tb_flush(CPUState *cpu) if (tcg_enabled()) { unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count); - if (cpu_in_exclusive_context(cpu)) { + if (cpu_in_serial_context(cpu)) { do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count)); } else { async_safe_run_on_cpu(cpu, do_tb_flush, From 0b3c75ad1a21574cc55b0c095a7dc21e2d27ffc8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 5 May 2023 21:55:01 +0100 Subject: [PATCH 50/53] accel/tcg: Introduce tlb_read_idx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of playing with offsetof in various places, use MMUAccessType to index an array. This is easily defined instead of the previous dummy padding array in the union. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 104 +++++++++++++--------------------------- include/exec/cpu-defs.h | 7 ++- include/exec/cpu_ldst.h | 26 ++++++++-- 3 files changed, 59 insertions(+), 78 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 0b8a5f93d2..5051244c67 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1441,34 +1441,17 @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full, } } -static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs) -{ -#if TCG_OVERSIZED_GUEST - return *(target_ulong *)((uintptr_t)entry + ofs); -#else - /* ofs might correspond to .addr_write, so use qatomic_read */ - return qatomic_read((target_ulong *)((uintptr_t)entry + ofs)); -#endif -} - /* Return true if ADDR is present in the victim tlb, and has been copied back to the main tlb. */ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, - size_t elt_ofs, target_ulong page) + MMUAccessType access_type, target_ulong page) { size_t vidx; assert_cpu_is_self(env_cpu(env)); for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx]; - target_ulong cmp; - - /* elt_ofs might correspond to .addr_write, so use qatomic_read */ -#if TCG_OVERSIZED_GUEST - cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); -#else - cmp = qatomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); -#endif + target_ulong cmp = tlb_read_idx(vtlb, access_type); if (cmp == page) { /* Found entry in victim tlb, swap tlb and iotlb. */ @@ -1490,11 +1473,6 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, return false; } -/* Macro to call the above, with local variables from the use context. */ -#define VICTIM_TLB_HIT(TY, ADDR) \ - victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \ - (ADDR) & TARGET_PAGE_MASK) - static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, CPUTLBEntryFull *full, uintptr_t retaddr) { @@ -1527,29 +1505,12 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr, { uintptr_t index = tlb_index(env, mmu_idx, addr); CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr, page_addr; - size_t elt_ofs; - int flags; + target_ulong tlb_addr = tlb_read_idx(entry, access_type); + target_ulong page_addr = addr & TARGET_PAGE_MASK; + int flags = TLB_FLAGS_MASK; - switch (access_type) { - case MMU_DATA_LOAD: - elt_ofs = offsetof(CPUTLBEntry, addr_read); - break; - case MMU_DATA_STORE: - elt_ofs = offsetof(CPUTLBEntry, addr_write); - break; - case MMU_INST_FETCH: - elt_ofs = offsetof(CPUTLBEntry, addr_code); - break; - default: - g_assert_not_reached(); - } - tlb_addr = tlb_read_ofs(entry, elt_ofs); - - flags = TLB_FLAGS_MASK; - page_addr = addr & TARGET_PAGE_MASK; if (!tlb_hit_page(tlb_addr, page_addr)) { - if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) { + if (!victim_tlb_hit(env, mmu_idx, index, access_type, page_addr)) { CPUState *cs = env_cpu(env); if (!cs->cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type, @@ -1571,7 +1532,7 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr, */ flags &= ~TLB_INVALID_MASK; } - tlb_addr = tlb_read_ofs(entry, elt_ofs); + tlb_addr = tlb_read_idx(entry, access_type); } flags &= tlb_addr; @@ -1802,7 +1763,8 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, if (prot & PAGE_WRITE) { tlb_addr = tlb_addr_write(tlbe); if (!tlb_hit(tlb_addr, addr)) { - if (!VICTIM_TLB_HIT(addr_write, addr)) { + if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE, + addr & TARGET_PAGE_MASK)) { tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, mmu_idx, retaddr); index = tlb_index(env, mmu_idx, addr); @@ -1835,7 +1797,8 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, } else /* if (prot & PAGE_READ) */ { tlb_addr = tlbe->addr_read; if (!tlb_hit(tlb_addr, addr)) { - if (!VICTIM_TLB_HIT(addr_read, addr)) { + if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_LOAD, + addr & TARGET_PAGE_MASK)) { tlb_fill(env_cpu(env), addr, size, MMU_DATA_LOAD, mmu_idx, retaddr); index = tlb_index(env, mmu_idx, addr); @@ -1929,13 +1892,9 @@ load_memop(const void *haddr, MemOp op) static inline uint64_t QEMU_ALWAYS_INLINE load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi, - uintptr_t retaddr, MemOp op, bool code_read, + uintptr_t retaddr, MemOp op, MMUAccessType access_type, FullLoadHelper *full_load) { - const size_t tlb_off = code_read ? - offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read); - const MMUAccessType access_type = - code_read ? MMU_INST_FETCH : MMU_DATA_LOAD; const unsigned a_bits = get_alignment_bits(get_memop(oi)); const size_t size = memop_size(op); uintptr_t mmu_idx = get_mmuidx(oi); @@ -1955,18 +1914,18 @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi, index = tlb_index(env, mmu_idx, addr); entry = tlb_entry(env, mmu_idx, addr); - tlb_addr = code_read ? entry->addr_code : entry->addr_read; + tlb_addr = tlb_read_idx(entry, access_type); /* If the TLB entry is for a different page, reload and try again. */ if (!tlb_hit(tlb_addr, addr)) { - if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, + if (!victim_tlb_hit(env, mmu_idx, index, access_type, addr & TARGET_PAGE_MASK)) { tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr); index = tlb_index(env, mmu_idx, addr); entry = tlb_entry(env, mmu_idx, addr); } - tlb_addr = code_read ? entry->addr_code : entry->addr_read; + tlb_addr = tlb_read_idx(entry, access_type); tlb_addr &= ~TLB_INVALID_MASK; } @@ -2052,7 +2011,8 @@ static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { validate_memop(oi, MO_UB); - return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu); + return load_helper(env, addr, oi, retaddr, MO_UB, MMU_DATA_LOAD, + full_ldub_mmu); } tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, @@ -2065,7 +2025,7 @@ static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { validate_memop(oi, MO_LEUW); - return load_helper(env, addr, oi, retaddr, MO_LEUW, false, + return load_helper(env, addr, oi, retaddr, MO_LEUW, MMU_DATA_LOAD, full_le_lduw_mmu); } @@ -2079,7 +2039,7 @@ static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { validate_memop(oi, MO_BEUW); - return load_helper(env, addr, oi, retaddr, MO_BEUW, false, + return load_helper(env, addr, oi, retaddr, MO_BEUW, MMU_DATA_LOAD, full_be_lduw_mmu); } @@ -2093,7 +2053,7 @@ static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { validate_memop(oi, MO_LEUL); - return load_helper(env, addr, oi, retaddr, MO_LEUL, false, + return load_helper(env, addr, oi, retaddr, MO_LEUL, MMU_DATA_LOAD, full_le_ldul_mmu); } @@ -2107,7 +2067,7 @@ static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { validate_memop(oi, MO_BEUL); - return load_helper(env, addr, oi, retaddr, MO_BEUL, false, + return load_helper(env, addr, oi, retaddr, MO_BEUL, MMU_DATA_LOAD, full_be_ldul_mmu); } @@ -2121,7 +2081,7 @@ uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { validate_memop(oi, MO_LEUQ); - return load_helper(env, addr, oi, retaddr, MO_LEUQ, false, + return load_helper(env, addr, oi, retaddr, MO_LEUQ, MMU_DATA_LOAD, helper_le_ldq_mmu); } @@ -2129,7 +2089,7 @@ uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { validate_memop(oi, MO_BEUQ); - return load_helper(env, addr, oi, retaddr, MO_BEUQ, false, + return load_helper(env, addr, oi, retaddr, MO_BEUQ, MMU_DATA_LOAD, helper_be_ldq_mmu); } @@ -2325,7 +2285,6 @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val, uintptr_t retaddr, size_t size, uintptr_t mmu_idx, bool big_endian) { - const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); uintptr_t index, index2; CPUTLBEntry *entry, *entry2; target_ulong page1, page2, tlb_addr, tlb_addr2; @@ -2347,7 +2306,7 @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val, tlb_addr2 = tlb_addr_write(entry2); if (page1 != page2 && !tlb_hit_page(tlb_addr2, page2)) { - if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) { + if (!victim_tlb_hit(env, mmu_idx, index2, MMU_DATA_STORE, page2)) { tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE, mmu_idx, retaddr); index2 = tlb_index(env, mmu_idx, page2); @@ -2400,7 +2359,6 @@ static inline void QEMU_ALWAYS_INLINE store_helper(CPUArchState *env, target_ulong addr, uint64_t val, MemOpIdx oi, uintptr_t retaddr, MemOp op) { - const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); const unsigned a_bits = get_alignment_bits(get_memop(oi)); const size_t size = memop_size(op); uintptr_t mmu_idx = get_mmuidx(oi); @@ -2423,7 +2381,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, /* If the TLB entry is for a different page, reload and try again. */ if (!tlb_hit(tlb_addr, addr)) { - if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, + if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE, addr & TARGET_PAGE_MASK)) { tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, mmu_idx, retaddr); @@ -2729,7 +2687,8 @@ void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val, static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { - return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code); + return load_helper(env, addr, oi, retaddr, MO_8, + MMU_INST_FETCH, full_ldub_code); } uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr) @@ -2741,7 +2700,8 @@ uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr) static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { - return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code); + return load_helper(env, addr, oi, retaddr, MO_TEUW, + MMU_INST_FETCH, full_lduw_code); } uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr) @@ -2753,7 +2713,8 @@ uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr) static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { - return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code); + return load_helper(env, addr, oi, retaddr, MO_TEUL, + MMU_INST_FETCH, full_ldl_code); } uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr) @@ -2765,7 +2726,8 @@ uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr) static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { - return load_helper(env, addr, oi, retaddr, MO_TEUQ, true, full_ldq_code); + return load_helper(env, addr, oi, retaddr, MO_TEUQ, + MMU_INST_FETCH, full_ldq_code); } uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr) diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h index e1c498ef4b..a6e0cf1812 100644 --- a/include/exec/cpu-defs.h +++ b/include/exec/cpu-defs.h @@ -111,8 +111,11 @@ typedef struct CPUTLBEntry { use the corresponding iotlb value. */ uintptr_t addend; }; - /* padding to get a power of two size */ - uint8_t dummy[1 << CPU_TLB_ENTRY_BITS]; + /* + * Padding to get a power of two size, as well as index + * access to addr_{read,write,code}. + */ + target_ulong addr_idx[(1 << CPU_TLB_ENTRY_BITS) / TARGET_LONG_SIZE]; }; } CPUTLBEntry; diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index c141f0394f..7c867c94c3 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -360,13 +360,29 @@ static inline void clear_helper_retaddr(void) /* Needed for TCG_OVERSIZED_GUEST */ #include "tcg/tcg.h" +static inline target_ulong tlb_read_idx(const CPUTLBEntry *entry, + MMUAccessType access_type) +{ + /* Do not rearrange the CPUTLBEntry structure members. */ + QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_read) != + MMU_DATA_LOAD * TARGET_LONG_SIZE); + QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_write) != + MMU_DATA_STORE * TARGET_LONG_SIZE); + QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_code) != + MMU_INST_FETCH * TARGET_LONG_SIZE); + + const target_ulong *ptr = &entry->addr_idx[access_type]; +#if TCG_OVERSIZED_GUEST + return *ptr; +#else + /* ofs might correspond to .addr_write, so use qatomic_read */ + return qatomic_read(ptr); +#endif +} + static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry) { -#if TCG_OVERSIZED_GUEST - return entry->addr_write; -#else - return qatomic_read(&entry->addr_write); -#endif + return tlb_read_idx(entry, MMU_DATA_STORE); } /* Find the TLB index corresponding to the mmu_idx + address pair. */ From 8cfdacaa1642ed18e48935f7536f8d233db8efcc Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 29 Oct 2022 08:40:51 +1100 Subject: [PATCH 51/53] accel/tcg: Reorg system mode load helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of trying to unify all operations on uint64_t, pull out mmu_lookup() to perform the basic tlb hit and resolution. Create individual functions to handle access by size. Reviewed-by: Alex Bennée Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 645 +++++++++++++++++++++++++++++---------------- 1 file changed, 424 insertions(+), 221 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 5051244c67..a85edd8246 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1716,6 +1716,179 @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, #endif +/* + * Probe for a load/store operation. + * Return the host address and into @flags. + */ + +typedef struct MMULookupPageData { + CPUTLBEntryFull *full; + void *haddr; + target_ulong addr; + int flags; + int size; +} MMULookupPageData; + +typedef struct MMULookupLocals { + MMULookupPageData page[2]; + MemOp memop; + int mmu_idx; +} MMULookupLocals; + +/** + * mmu_lookup1: translate one page + * @env: cpu context + * @data: lookup parameters + * @mmu_idx: virtual address context + * @access_type: load/store/code + * @ra: return address into tcg generated code, or 0 + * + * Resolve the translation for the one page at @data.addr, filling in + * the rest of @data with the results. If the translation fails, + * tlb_fill will longjmp out. Return true if the softmmu tlb for + * @mmu_idx may have resized. + */ +static bool mmu_lookup1(CPUArchState *env, MMULookupPageData *data, + int mmu_idx, MMUAccessType access_type, uintptr_t ra) +{ + target_ulong addr = data->addr; + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = tlb_read_idx(entry, access_type); + bool maybe_resized = false; + + /* If the TLB entry is for a different page, reload and try again. */ + if (!tlb_hit(tlb_addr, addr)) { + if (!victim_tlb_hit(env, mmu_idx, index, access_type, + addr & TARGET_PAGE_MASK)) { + tlb_fill(env_cpu(env), addr, data->size, access_type, mmu_idx, ra); + maybe_resized = true; + index = tlb_index(env, mmu_idx, addr); + entry = tlb_entry(env, mmu_idx, addr); + } + tlb_addr = tlb_read_idx(entry, access_type) & ~TLB_INVALID_MASK; + } + + data->flags = tlb_addr & TLB_FLAGS_MASK; + data->full = &env_tlb(env)->d[mmu_idx].fulltlb[index]; + /* Compute haddr speculatively; depending on flags it might be invalid. */ + data->haddr = (void *)((uintptr_t)addr + entry->addend); + + return maybe_resized; +} + +/** + * mmu_watch_or_dirty + * @env: cpu context + * @data: lookup parameters + * @access_type: load/store/code + * @ra: return address into tcg generated code, or 0 + * + * Trigger watchpoints for @data.addr:@data.size; + * record writes to protected clean pages. + */ +static void mmu_watch_or_dirty(CPUArchState *env, MMULookupPageData *data, + MMUAccessType access_type, uintptr_t ra) +{ + CPUTLBEntryFull *full = data->full; + target_ulong addr = data->addr; + int flags = data->flags; + int size = data->size; + + /* On watchpoint hit, this will longjmp out. */ + if (flags & TLB_WATCHPOINT) { + int wp = access_type == MMU_DATA_STORE ? BP_MEM_WRITE : BP_MEM_READ; + cpu_check_watchpoint(env_cpu(env), addr, size, full->attrs, wp, ra); + flags &= ~TLB_WATCHPOINT; + } + + /* Note that notdirty is only set for writes. */ + if (flags & TLB_NOTDIRTY) { + notdirty_write(env_cpu(env), addr, size, full, ra); + flags &= ~TLB_NOTDIRTY; + } + data->flags = flags; +} + +/** + * mmu_lookup: translate page(s) + * @env: cpu context + * @addr: virtual address + * @oi: combined mmu_idx and MemOp + * @ra: return address into tcg generated code, or 0 + * @access_type: load/store/code + * @l: output result + * + * Resolve the translation for the page(s) beginning at @addr, for MemOp.size + * bytes. Return true if the lookup crosses a page boundary. + */ +static bool mmu_lookup(CPUArchState *env, target_ulong addr, MemOpIdx oi, + uintptr_t ra, MMUAccessType type, MMULookupLocals *l) +{ + unsigned a_bits; + bool crosspage; + int flags; + + l->memop = get_memop(oi); + l->mmu_idx = get_mmuidx(oi); + + tcg_debug_assert(l->mmu_idx < NB_MMU_MODES); + + /* Handle CPU specific unaligned behaviour */ + a_bits = get_alignment_bits(l->memop); + if (addr & ((1 << a_bits) - 1)) { + cpu_unaligned_access(env_cpu(env), addr, type, l->mmu_idx, ra); + } + + l->page[0].addr = addr; + l->page[0].size = memop_size(l->memop); + l->page[1].addr = (addr + l->page[0].size - 1) & TARGET_PAGE_MASK; + l->page[1].size = 0; + crosspage = (addr ^ l->page[1].addr) & TARGET_PAGE_MASK; + + if (likely(!crosspage)) { + mmu_lookup1(env, &l->page[0], l->mmu_idx, type, ra); + + flags = l->page[0].flags; + if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) { + mmu_watch_or_dirty(env, &l->page[0], type, ra); + } + if (unlikely(flags & TLB_BSWAP)) { + l->memop ^= MO_BSWAP; + } + } else { + /* Finish compute of page crossing. */ + int size0 = l->page[1].addr - addr; + l->page[1].size = l->page[0].size - size0; + l->page[0].size = size0; + + /* + * Lookup both pages, recognizing exceptions from either. If the + * second lookup potentially resized, refresh first CPUTLBEntryFull. + */ + mmu_lookup1(env, &l->page[0], l->mmu_idx, type, ra); + if (mmu_lookup1(env, &l->page[1], l->mmu_idx, type, ra)) { + uintptr_t index = tlb_index(env, l->mmu_idx, addr); + l->page[0].full = &env_tlb(env)->d[l->mmu_idx].fulltlb[index]; + } + + flags = l->page[0].flags | l->page[1].flags; + if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) { + mmu_watch_or_dirty(env, &l->page[0], type, ra); + mmu_watch_or_dirty(env, &l->page[1], type, ra); + } + + /* + * Since target/sparc is the only user of TLB_BSWAP, and all + * Sparc accesses are aligned, any treatment across two pages + * would be arbitrary. Refuse it until there's a use. + */ + tcg_debug_assert((flags & TLB_BSWAP) == 0); + } + + return crosspage; +} + /* * Probe for an atomic operation. Do not allow unaligned operations, * or io operations to proceed. Return the host address. @@ -1890,113 +2063,6 @@ load_memop(const void *haddr, MemOp op) } } -static inline uint64_t QEMU_ALWAYS_INLINE -load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi, - uintptr_t retaddr, MemOp op, MMUAccessType access_type, - FullLoadHelper *full_load) -{ - const unsigned a_bits = get_alignment_bits(get_memop(oi)); - const size_t size = memop_size(op); - uintptr_t mmu_idx = get_mmuidx(oi); - uintptr_t index; - CPUTLBEntry *entry; - target_ulong tlb_addr; - void *haddr; - uint64_t res; - - tcg_debug_assert(mmu_idx < NB_MMU_MODES); - - /* Handle CPU specific unaligned behaviour */ - if (addr & ((1 << a_bits) - 1)) { - cpu_unaligned_access(env_cpu(env), addr, access_type, - mmu_idx, retaddr); - } - - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - tlb_addr = tlb_read_idx(entry, access_type); - - /* If the TLB entry is for a different page, reload and try again. */ - if (!tlb_hit(tlb_addr, addr)) { - if (!victim_tlb_hit(env, mmu_idx, index, access_type, - addr & TARGET_PAGE_MASK)) { - tlb_fill(env_cpu(env), addr, size, - access_type, mmu_idx, retaddr); - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - } - tlb_addr = tlb_read_idx(entry, access_type); - tlb_addr &= ~TLB_INVALID_MASK; - } - - /* Handle anything that isn't just a straight memory access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - CPUTLBEntryFull *full; - bool need_swap; - - /* For anything that is unaligned, recurse through full_load. */ - if ((addr & (size - 1)) != 0) { - goto do_unaligned_access; - } - - full = &env_tlb(env)->d[mmu_idx].fulltlb[index]; - - /* Handle watchpoints. */ - if (unlikely(tlb_addr & TLB_WATCHPOINT)) { - /* On watchpoint hit, this will longjmp out. */ - cpu_check_watchpoint(env_cpu(env), addr, size, - full->attrs, BP_MEM_READ, retaddr); - } - - need_swap = size > 1 && (tlb_addr & TLB_BSWAP); - - /* Handle I/O access. */ - if (likely(tlb_addr & TLB_MMIO)) { - return io_readx(env, full, mmu_idx, addr, retaddr, - access_type, op ^ (need_swap * MO_BSWAP)); - } - - haddr = (void *)((uintptr_t)addr + entry->addend); - - /* - * Keep these two load_memop separate to ensure that the compiler - * is able to fold the entire function to a single instruction. - * There is a build-time assert inside to remind you of this. ;-) - */ - if (unlikely(need_swap)) { - return load_memop(haddr, op ^ MO_BSWAP); - } - return load_memop(haddr, op); - } - - /* Handle slow unaligned access (it spans two pages or IO). */ - if (size > 1 - && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 - >= TARGET_PAGE_SIZE)) { - target_ulong addr1, addr2; - uint64_t r1, r2; - unsigned shift; - do_unaligned_access: - addr1 = addr & ~((target_ulong)size - 1); - addr2 = addr1 + size; - r1 = full_load(env, addr1, oi, retaddr); - r2 = full_load(env, addr2, oi, retaddr); - shift = (addr & (size - 1)) * 8; - - if (memop_big_endian(op)) { - /* Big-endian combine. */ - res = (r1 << shift) | (r2 >> ((size * 8) - shift)); - } else { - /* Little-endian combine. */ - res = (r1 >> shift) | (r2 << ((size * 8) - shift)); - } - return res & MAKE_64BIT_MASK(0, size * 8); - } - - haddr = (void *)((uintptr_t)addr + entry->addend); - return load_memop(haddr, op); -} - /* * For the benefit of TCG generated code, we want to avoid the * complication of ABI-specific return type promotion and always @@ -2007,90 +2073,250 @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi, * We don't bother with this widened value for SOFTMMU_CODE_ACCESS. */ -static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, - MemOpIdx oi, uintptr_t retaddr) +/** + * do_ld_mmio_beN: + * @env: cpu context + * @p: translation parameters + * @ret_be: accumulated data + * @mmu_idx: virtual address context + * @ra: return address into tcg generated code, or 0 + * + * Load @p->size bytes from @p->addr, which is memory-mapped i/o. + * The bytes are concatenated in big-endian order with @ret_be. + */ +static uint64_t do_ld_mmio_beN(CPUArchState *env, MMULookupPageData *p, + uint64_t ret_be, int mmu_idx, + MMUAccessType type, uintptr_t ra) { - validate_memop(oi, MO_UB); - return load_helper(env, addr, oi, retaddr, MO_UB, MMU_DATA_LOAD, - full_ldub_mmu); + CPUTLBEntryFull *full = p->full; + target_ulong addr = p->addr; + int i, size = p->size; + + QEMU_IOTHREAD_LOCK_GUARD(); + for (i = 0; i < size; i++) { + uint8_t x = io_readx(env, full, mmu_idx, addr + i, ra, type, MO_UB); + ret_be = (ret_be << 8) | x; + } + return ret_be; +} + +/** + * do_ld_bytes_beN + * @p: translation parameters + * @ret_be: accumulated data + * + * Load @p->size bytes from @p->haddr, which is RAM. + * The bytes to concatenated in big-endian order with @ret_be. + */ +static uint64_t do_ld_bytes_beN(MMULookupPageData *p, uint64_t ret_be) +{ + uint8_t *haddr = p->haddr; + int i, size = p->size; + + for (i = 0; i < size; i++) { + ret_be = (ret_be << 8) | haddr[i]; + } + return ret_be; +} + +/* + * Wrapper for the above. + */ +static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p, + uint64_t ret_be, int mmu_idx, + MMUAccessType type, uintptr_t ra) +{ + if (unlikely(p->flags & TLB_MMIO)) { + return do_ld_mmio_beN(env, p, ret_be, mmu_idx, type, ra); + } else { + return do_ld_bytes_beN(p, ret_be); + } +} + +static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx, + MMUAccessType type, uintptr_t ra) +{ + if (unlikely(p->flags & TLB_MMIO)) { + return io_readx(env, p->full, mmu_idx, p->addr, ra, type, MO_UB); + } else { + return *(uint8_t *)p->haddr; + } +} + +static uint16_t do_ld_2(CPUArchState *env, MMULookupPageData *p, int mmu_idx, + MMUAccessType type, MemOp memop, uintptr_t ra) +{ + uint64_t ret; + + if (unlikely(p->flags & TLB_MMIO)) { + return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop); + } + + /* Perform the load host endian, then swap if necessary. */ + ret = load_memop(p->haddr, MO_UW); + if (memop & MO_BSWAP) { + ret = bswap16(ret); + } + return ret; +} + +static uint32_t do_ld_4(CPUArchState *env, MMULookupPageData *p, int mmu_idx, + MMUAccessType type, MemOp memop, uintptr_t ra) +{ + uint32_t ret; + + if (unlikely(p->flags & TLB_MMIO)) { + return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop); + } + + /* Perform the load host endian. */ + ret = load_memop(p->haddr, MO_UL); + if (memop & MO_BSWAP) { + ret = bswap32(ret); + } + return ret; +} + +static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx, + MMUAccessType type, MemOp memop, uintptr_t ra) +{ + uint64_t ret; + + if (unlikely(p->flags & TLB_MMIO)) { + return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop); + } + + /* Perform the load host endian. */ + ret = load_memop(p->haddr, MO_UQ); + if (memop & MO_BSWAP) { + ret = bswap64(ret); + } + return ret; +} + +static uint8_t do_ld1_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, + uintptr_t ra, MMUAccessType access_type) +{ + MMULookupLocals l; + bool crosspage; + + crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l); + tcg_debug_assert(!crosspage); + + return do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra); } tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { - return full_ldub_mmu(env, addr, oi, retaddr); + validate_memop(oi, MO_UB); + return do_ld1_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD); } -static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, - MemOpIdx oi, uintptr_t retaddr) +static uint16_t do_ld2_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, + uintptr_t ra, MMUAccessType access_type) { - validate_memop(oi, MO_LEUW); - return load_helper(env, addr, oi, retaddr, MO_LEUW, MMU_DATA_LOAD, - full_le_lduw_mmu); + MMULookupLocals l; + bool crosspage; + uint16_t ret; + uint8_t a, b; + + crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l); + if (likely(!crosspage)) { + return do_ld_2(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra); + } + + a = do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra); + b = do_ld_1(env, &l.page[1], l.mmu_idx, access_type, ra); + + if ((l.memop & MO_BSWAP) == MO_LE) { + ret = a | (b << 8); + } else { + ret = b | (a << 8); + } + return ret; } tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { - return full_le_lduw_mmu(env, addr, oi, retaddr); -} - -static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, - MemOpIdx oi, uintptr_t retaddr) -{ - validate_memop(oi, MO_BEUW); - return load_helper(env, addr, oi, retaddr, MO_BEUW, MMU_DATA_LOAD, - full_be_lduw_mmu); + validate_memop(oi, MO_LEUW); + return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD); } tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { - return full_be_lduw_mmu(env, addr, oi, retaddr); + validate_memop(oi, MO_BEUW); + return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD); } -static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, - MemOpIdx oi, uintptr_t retaddr) +static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, + uintptr_t ra, MMUAccessType access_type) { - validate_memop(oi, MO_LEUL); - return load_helper(env, addr, oi, retaddr, MO_LEUL, MMU_DATA_LOAD, - full_le_ldul_mmu); + MMULookupLocals l; + bool crosspage; + uint32_t ret; + + crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l); + if (likely(!crosspage)) { + return do_ld_4(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra); + } + + ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra); + ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra); + if ((l.memop & MO_BSWAP) == MO_LE) { + ret = bswap32(ret); + } + return ret; } tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { - return full_le_ldul_mmu(env, addr, oi, retaddr); -} - -static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, - MemOpIdx oi, uintptr_t retaddr) -{ - validate_memop(oi, MO_BEUL); - return load_helper(env, addr, oi, retaddr, MO_BEUL, MMU_DATA_LOAD, - full_be_ldul_mmu); + validate_memop(oi, MO_LEUL); + return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD); } tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { - return full_be_ldul_mmu(env, addr, oi, retaddr); + validate_memop(oi, MO_BEUL); + return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD); +} + +static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, + uintptr_t ra, MMUAccessType access_type) +{ + MMULookupLocals l; + bool crosspage; + uint64_t ret; + + crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l); + if (likely(!crosspage)) { + return do_ld_8(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra); + } + + ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra); + ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra); + if ((l.memop & MO_BSWAP) == MO_LE) { + ret = bswap64(ret); + } + return ret; } uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { validate_memop(oi, MO_LEUQ); - return load_helper(env, addr, oi, retaddr, MO_LEUQ, MMU_DATA_LOAD, - helper_le_ldq_mmu); + return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD); } uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { validate_memop(oi, MO_BEUQ); - return load_helper(env, addr, oi, retaddr, MO_BEUQ, MMU_DATA_LOAD, - helper_be_ldq_mmu); + return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD); } /* @@ -2133,56 +2359,85 @@ tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, * Load helpers for cpu_ldst.h. */ -static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr, - MemOpIdx oi, uintptr_t retaddr, - FullLoadHelper *full_load) +static void plugin_load_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi) { - uint64_t ret; - - ret = full_load(env, addr, oi, retaddr); qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); - return ret; } uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra) { - return cpu_load_helper(env, addr, oi, ra, full_ldub_mmu); + uint8_t ret; + + validate_memop(oi, MO_UB); + ret = do_ld1_mmu(env, addr, oi, ra, MMU_DATA_LOAD); + plugin_load_cb(env, addr, oi); + return ret; } uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra) { - return cpu_load_helper(env, addr, oi, ra, full_be_lduw_mmu); + uint16_t ret; + + validate_memop(oi, MO_BEUW); + ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD); + plugin_load_cb(env, addr, oi); + return ret; } uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra) { - return cpu_load_helper(env, addr, oi, ra, full_be_ldul_mmu); + uint32_t ret; + + validate_memop(oi, MO_BEUL); + ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD); + plugin_load_cb(env, addr, oi); + return ret; } uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra) { - return cpu_load_helper(env, addr, oi, ra, helper_be_ldq_mmu); + uint64_t ret; + + validate_memop(oi, MO_BEUQ); + ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD); + plugin_load_cb(env, addr, oi); + return ret; } uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra) { - return cpu_load_helper(env, addr, oi, ra, full_le_lduw_mmu); + uint16_t ret; + + validate_memop(oi, MO_LEUW); + ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD); + plugin_load_cb(env, addr, oi); + return ret; } uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra) { - return cpu_load_helper(env, addr, oi, ra, full_le_ldul_mmu); + uint32_t ret; + + validate_memop(oi, MO_LEUL); + ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD); + plugin_load_cb(env, addr, oi); + return ret; } uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra) { - return cpu_load_helper(env, addr, oi, ra, helper_le_ldq_mmu); + uint64_t ret; + + validate_memop(oi, MO_LEUQ); + ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD); + plugin_load_cb(env, addr, oi); + return ret; } Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr, @@ -2684,102 +2939,50 @@ void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val, /* Code access functions. */ -static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr, - MemOpIdx oi, uintptr_t retaddr) -{ - return load_helper(env, addr, oi, retaddr, MO_8, - MMU_INST_FETCH, full_ldub_code); -} - uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr) { MemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true)); - return full_ldub_code(env, addr, oi, 0); -} - -static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr, - MemOpIdx oi, uintptr_t retaddr) -{ - return load_helper(env, addr, oi, retaddr, MO_TEUW, - MMU_INST_FETCH, full_lduw_code); + return do_ld1_mmu(env, addr, oi, 0, MMU_INST_FETCH); } uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr) { MemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true)); - return full_lduw_code(env, addr, oi, 0); -} - -static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr, - MemOpIdx oi, uintptr_t retaddr) -{ - return load_helper(env, addr, oi, retaddr, MO_TEUL, - MMU_INST_FETCH, full_ldl_code); + return do_ld2_mmu(env, addr, oi, 0, MMU_INST_FETCH); } uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr) { MemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true)); - return full_ldl_code(env, addr, oi, 0); -} - -static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr, - MemOpIdx oi, uintptr_t retaddr) -{ - return load_helper(env, addr, oi, retaddr, MO_TEUQ, - MMU_INST_FETCH, full_ldq_code); + return do_ld4_mmu(env, addr, oi, 0, MMU_INST_FETCH); } uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr) { MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true)); - return full_ldq_code(env, addr, oi, 0); + return do_ld8_mmu(env, addr, oi, 0, MMU_INST_FETCH); } uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t retaddr) { - return full_ldub_code(env, addr, oi, retaddr); + return do_ld1_mmu(env, addr, oi, retaddr, MMU_INST_FETCH); } uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t retaddr) { - MemOp mop = get_memop(oi); - int idx = get_mmuidx(oi); - uint16_t ret; - - ret = full_lduw_code(env, addr, make_memop_idx(MO_TEUW, idx), retaddr); - if ((mop & MO_BSWAP) != MO_TE) { - ret = bswap16(ret); - } - return ret; + return do_ld2_mmu(env, addr, oi, retaddr, MMU_INST_FETCH); } uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t retaddr) { - MemOp mop = get_memop(oi); - int idx = get_mmuidx(oi); - uint32_t ret; - - ret = full_ldl_code(env, addr, make_memop_idx(MO_TEUL, idx), retaddr); - if ((mop & MO_BSWAP) != MO_TE) { - ret = bswap32(ret); - } - return ret; + return do_ld4_mmu(env, addr, oi, retaddr, MMU_INST_FETCH); } uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t retaddr) { - MemOp mop = get_memop(oi); - int idx = get_mmuidx(oi); - uint64_t ret; - - ret = full_ldq_code(env, addr, make_memop_idx(MO_TEUQ, idx), retaddr); - if ((mop & MO_BSWAP) != MO_TE) { - ret = bswap64(ret); - } - return ret; + return do_ld8_mmu(env, addr, oi, retaddr, MMU_INST_FETCH); } From 592134617c98f37b8b39c6dd684e5a1832c071d2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 30 Oct 2022 12:07:32 +1100 Subject: [PATCH 52/53] accel/tcg: Reorg system mode store helpers Instead of trying to unify all operations on uint64_t, use mmu_lookup() to perform the basic tlb hit and resolution. Create individual functions to handle access by size. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 408 +++++++++++++++++++++------------------------ 1 file changed, 193 insertions(+), 215 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index a85edd8246..617777055a 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -2532,322 +2532,300 @@ store_memop(void *haddr, uint64_t val, MemOp op) } } -static void full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr); - -static void __attribute__((noinline)) -store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val, - uintptr_t retaddr, size_t size, uintptr_t mmu_idx, - bool big_endian) +/** + * do_st_mmio_leN: + * @env: cpu context + * @p: translation parameters + * @val_le: data to store + * @mmu_idx: virtual address context + * @ra: return address into tcg generated code, or 0 + * + * Store @p->size bytes at @p->addr, which is memory-mapped i/o. + * The bytes to store are extracted in little-endian order from @val_le; + * return the bytes of @val_le beyond @p->size that have not been stored. + */ +static uint64_t do_st_mmio_leN(CPUArchState *env, MMULookupPageData *p, + uint64_t val_le, int mmu_idx, uintptr_t ra) { - uintptr_t index, index2; - CPUTLBEntry *entry, *entry2; - target_ulong page1, page2, tlb_addr, tlb_addr2; - MemOpIdx oi; - size_t size2; - int i; + CPUTLBEntryFull *full = p->full; + target_ulong addr = p->addr; + int i, size = p->size; - /* - * Ensure the second page is in the TLB. Note that the first page - * is already guaranteed to be filled, and that the second page - * cannot evict the first. An exception to this rule is PAGE_WRITE_INV - * handling: the first page could have evicted itself. - */ - page1 = addr & TARGET_PAGE_MASK; - page2 = (addr + size) & TARGET_PAGE_MASK; - size2 = (addr + size) & ~TARGET_PAGE_MASK; - index2 = tlb_index(env, mmu_idx, page2); - entry2 = tlb_entry(env, mmu_idx, page2); - - tlb_addr2 = tlb_addr_write(entry2); - if (page1 != page2 && !tlb_hit_page(tlb_addr2, page2)) { - if (!victim_tlb_hit(env, mmu_idx, index2, MMU_DATA_STORE, page2)) { - tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE, - mmu_idx, retaddr); - index2 = tlb_index(env, mmu_idx, page2); - entry2 = tlb_entry(env, mmu_idx, page2); - } - tlb_addr2 = tlb_addr_write(entry2); + QEMU_IOTHREAD_LOCK_GUARD(); + for (i = 0; i < size; i++, val_le >>= 8) { + io_writex(env, full, mmu_idx, val_le, addr + i, ra, MO_UB); } + return val_le; +} - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - tlb_addr = tlb_addr_write(entry); +/** + * do_st_bytes_leN: + * @p: translation parameters + * @val_le: data to store + * + * Store @p->size bytes at @p->haddr, which is RAM. + * The bytes to store are extracted in little-endian order from @val_le; + * return the bytes of @val_le beyond @p->size that have not been stored. + */ +static uint64_t do_st_bytes_leN(MMULookupPageData *p, uint64_t val_le) +{ + uint8_t *haddr = p->haddr; + int i, size = p->size; - /* - * Handle watchpoints. Since this may trap, all checks - * must happen before any store. - */ - if (unlikely(tlb_addr & TLB_WATCHPOINT)) { - cpu_check_watchpoint(env_cpu(env), addr, size - size2, - env_tlb(env)->d[mmu_idx].fulltlb[index].attrs, - BP_MEM_WRITE, retaddr); - } - if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) { - cpu_check_watchpoint(env_cpu(env), page2, size2, - env_tlb(env)->d[mmu_idx].fulltlb[index2].attrs, - BP_MEM_WRITE, retaddr); + for (i = 0; i < size; i++, val_le >>= 8) { + haddr[i] = val_le; } + return val_le; +} - /* - * XXX: not efficient, but simple. - * This loop must go in the forward direction to avoid issues - * with self-modifying code in Windows 64-bit. - */ - oi = make_memop_idx(MO_UB, mmu_idx); - if (big_endian) { - for (i = 0; i < size; ++i) { - /* Big-endian extract. */ - uint8_t val8 = val >> (((size - 1) * 8) - (i * 8)); - full_stb_mmu(env, addr + i, val8, oi, retaddr); - } +/* + * Wrapper for the above. + */ +static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p, + uint64_t val_le, int mmu_idx, uintptr_t ra) +{ + if (unlikely(p->flags & TLB_MMIO)) { + return do_st_mmio_leN(env, p, val_le, mmu_idx, ra); + } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { + return val_le >> (p->size * 8); } else { - for (i = 0; i < size; ++i) { - /* Little-endian extract. */ - uint8_t val8 = val >> (i * 8); - full_stb_mmu(env, addr + i, val8, oi, retaddr); - } + return do_st_bytes_leN(p, val_le); } } -static inline void QEMU_ALWAYS_INLINE -store_helper(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr, MemOp op) +static void do_st_1(CPUArchState *env, MMULookupPageData *p, uint8_t val, + int mmu_idx, uintptr_t ra) { - const unsigned a_bits = get_alignment_bits(get_memop(oi)); - const size_t size = memop_size(op); - uintptr_t mmu_idx = get_mmuidx(oi); - uintptr_t index; - CPUTLBEntry *entry; - target_ulong tlb_addr; - void *haddr; - - tcg_debug_assert(mmu_idx < NB_MMU_MODES); - - /* Handle CPU specific unaligned behaviour */ - if (addr & ((1 << a_bits) - 1)) { - cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, - mmu_idx, retaddr); + if (unlikely(p->flags & TLB_MMIO)) { + io_writex(env, p->full, mmu_idx, val, p->addr, ra, MO_UB); + } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { + /* nothing */ + } else { + *(uint8_t *)p->haddr = val; } - - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - tlb_addr = tlb_addr_write(entry); - - /* If the TLB entry is for a different page, reload and try again. */ - if (!tlb_hit(tlb_addr, addr)) { - if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE, - addr & TARGET_PAGE_MASK)) { - tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, - mmu_idx, retaddr); - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - } - tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; - } - - /* Handle anything that isn't just a straight memory access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - CPUTLBEntryFull *full; - bool need_swap; - - /* For anything that is unaligned, recurse through byte stores. */ - if ((addr & (size - 1)) != 0) { - goto do_unaligned_access; - } - - full = &env_tlb(env)->d[mmu_idx].fulltlb[index]; - - /* Handle watchpoints. */ - if (unlikely(tlb_addr & TLB_WATCHPOINT)) { - /* On watchpoint hit, this will longjmp out. */ - cpu_check_watchpoint(env_cpu(env), addr, size, - full->attrs, BP_MEM_WRITE, retaddr); - } - - need_swap = size > 1 && (tlb_addr & TLB_BSWAP); - - /* Handle I/O access. */ - if (tlb_addr & TLB_MMIO) { - io_writex(env, full, mmu_idx, val, addr, retaddr, - op ^ (need_swap * MO_BSWAP)); - return; - } - - /* Ignore writes to ROM. */ - if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) { - return; - } - - /* Handle clean RAM pages. */ - if (tlb_addr & TLB_NOTDIRTY) { - notdirty_write(env_cpu(env), addr, size, full, retaddr); - } - - haddr = (void *)((uintptr_t)addr + entry->addend); - - /* - * Keep these two store_memop separate to ensure that the compiler - * is able to fold the entire function to a single instruction. - * There is a build-time assert inside to remind you of this. ;-) - */ - if (unlikely(need_swap)) { - store_memop(haddr, val, op ^ MO_BSWAP); - } else { - store_memop(haddr, val, op); - } - return; - } - - /* Handle slow unaligned access (it spans two pages or IO). */ - if (size > 1 - && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 - >= TARGET_PAGE_SIZE)) { - do_unaligned_access: - store_helper_unaligned(env, addr, val, retaddr, size, - mmu_idx, memop_big_endian(op)); - return; - } - - haddr = (void *)((uintptr_t)addr + entry->addend); - store_memop(haddr, val, op); } -static void __attribute__((noinline)) -full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr) +static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val, + int mmu_idx, MemOp memop, uintptr_t ra) { - validate_memop(oi, MO_UB); - store_helper(env, addr, val, oi, retaddr, MO_UB); + if (unlikely(p->flags & TLB_MMIO)) { + io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop); + } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { + /* nothing */ + } else { + /* Swap to host endian if necessary, then store. */ + if (memop & MO_BSWAP) { + val = bswap16(val); + } + store_memop(p->haddr, val, MO_UW); + } +} + +static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val, + int mmu_idx, MemOp memop, uintptr_t ra) +{ + if (unlikely(p->flags & TLB_MMIO)) { + io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop); + } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { + /* nothing */ + } else { + /* Swap to host endian if necessary, then store. */ + if (memop & MO_BSWAP) { + val = bswap32(val); + } + store_memop(p->haddr, val, MO_UL); + } +} + +static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val, + int mmu_idx, MemOp memop, uintptr_t ra) +{ + if (unlikely(p->flags & TLB_MMIO)) { + io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop); + } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { + /* nothing */ + } else { + /* Swap to host endian if necessary, then store. */ + if (memop & MO_BSWAP) { + val = bswap64(val); + } + store_memop(p->haddr, val, MO_UQ); + } } void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val, - MemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t ra) { - full_stb_mmu(env, addr, val, oi, retaddr); + MMULookupLocals l; + bool crosspage; + + validate_memop(oi, MO_UB); + crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); + tcg_debug_assert(!crosspage); + + do_st_1(env, &l.page[0], val, l.mmu_idx, ra); } -static void full_le_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr) +static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val, + MemOpIdx oi, uintptr_t ra) { - validate_memop(oi, MO_LEUW); - store_helper(env, addr, val, oi, retaddr, MO_LEUW); + MMULookupLocals l; + bool crosspage; + uint8_t a, b; + + crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); + if (likely(!crosspage)) { + do_st_2(env, &l.page[0], val, l.mmu_idx, l.memop, ra); + return; + } + + if ((l.memop & MO_BSWAP) == MO_LE) { + a = val, b = val >> 8; + } else { + b = val, a = val >> 8; + } + do_st_1(env, &l.page[0], a, l.mmu_idx, ra); + do_st_1(env, &l.page[1], b, l.mmu_idx, ra); } void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val, MemOpIdx oi, uintptr_t retaddr) { - full_le_stw_mmu(env, addr, val, oi, retaddr); -} - -static void full_be_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr) -{ - validate_memop(oi, MO_BEUW); - store_helper(env, addr, val, oi, retaddr, MO_BEUW); + validate_memop(oi, MO_LEUW); + do_st2_mmu(env, addr, val, oi, retaddr); } void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val, MemOpIdx oi, uintptr_t retaddr) { - full_be_stw_mmu(env, addr, val, oi, retaddr); + validate_memop(oi, MO_BEUW); + do_st2_mmu(env, addr, val, oi, retaddr); } -static void full_le_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr) +static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val, + MemOpIdx oi, uintptr_t ra) { - validate_memop(oi, MO_LEUL); - store_helper(env, addr, val, oi, retaddr, MO_LEUL); + MMULookupLocals l; + bool crosspage; + + crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); + if (likely(!crosspage)) { + do_st_4(env, &l.page[0], val, l.mmu_idx, l.memop, ra); + return; + } + + /* Swap to little endian for simplicity, then store by bytes. */ + if ((l.memop & MO_BSWAP) != MO_LE) { + val = bswap32(val); + } + val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra); + (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra); } void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, MemOpIdx oi, uintptr_t retaddr) { - full_le_stl_mmu(env, addr, val, oi, retaddr); -} - -static void full_be_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr) -{ - validate_memop(oi, MO_BEUL); - store_helper(env, addr, val, oi, retaddr, MO_BEUL); + validate_memop(oi, MO_LEUL); + do_st4_mmu(env, addr, val, oi, retaddr); } void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, MemOpIdx oi, uintptr_t retaddr) { - full_be_stl_mmu(env, addr, val, oi, retaddr); + validate_memop(oi, MO_BEUL); + do_st4_mmu(env, addr, val, oi, retaddr); +} + +static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + MemOpIdx oi, uintptr_t ra) +{ + MMULookupLocals l; + bool crosspage; + + crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); + if (likely(!crosspage)) { + do_st_8(env, &l.page[0], val, l.mmu_idx, l.memop, ra); + return; + } + + /* Swap to little endian for simplicity, then store by bytes. */ + if ((l.memop & MO_BSWAP) != MO_LE) { + val = bswap64(val); + } + val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra); + (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra); } void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, MemOpIdx oi, uintptr_t retaddr) { validate_memop(oi, MO_LEUQ); - store_helper(env, addr, val, oi, retaddr, MO_LEUQ); + do_st8_mmu(env, addr, val, oi, retaddr); } void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, MemOpIdx oi, uintptr_t retaddr) { validate_memop(oi, MO_BEUQ); - store_helper(env, addr, val, oi, retaddr, MO_BEUQ); + do_st8_mmu(env, addr, val, oi, retaddr); } /* * Store Helpers for cpu_ldst.h */ -typedef void FullStoreHelper(CPUArchState *env, target_ulong addr, - uint64_t val, MemOpIdx oi, uintptr_t retaddr); - -static inline void cpu_store_helper(CPUArchState *env, target_ulong addr, - uint64_t val, MemOpIdx oi, uintptr_t ra, - FullStoreHelper *full_store) +static void plugin_store_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi) { - full_store(env, addr, val, oi, ra); qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); } void cpu_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, full_stb_mmu); + helper_ret_stb_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_stw_be_mmu(CPUArchState *env, target_ulong addr, uint16_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, full_be_stw_mmu); + helper_be_stw_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_stl_be_mmu(CPUArchState *env, target_ulong addr, uint32_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, full_be_stl_mmu); + helper_be_stl_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_stq_be_mmu(CPUArchState *env, target_ulong addr, uint64_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, helper_be_stq_mmu); + helper_be_stq_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_stw_le_mmu(CPUArchState *env, target_ulong addr, uint16_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, full_le_stw_mmu); + helper_le_stw_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_stl_le_mmu(CPUArchState *env, target_ulong addr, uint32_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, full_le_stl_mmu); + helper_le_stl_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, helper_le_stq_mmu); + helper_le_stq_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val, From 335dfd253fc242b009a1b9b5d4fffbf4ea52928d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 8 Mar 2023 12:48:13 -0800 Subject: [PATCH 53/53] target/loongarch: Do not include tcg-ldst.h This header is supposed to be private to tcg and in fact does not need to be included here at all. Reviewed-by: Song Gao Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- target/loongarch/csr_helper.c | 1 - target/loongarch/iocsr_helper.c | 1 - 2 files changed, 2 deletions(-) diff --git a/target/loongarch/csr_helper.c b/target/loongarch/csr_helper.c index 7e02787895..6526367946 100644 --- a/target/loongarch/csr_helper.c +++ b/target/loongarch/csr_helper.c @@ -15,7 +15,6 @@ #include "exec/cpu_ldst.h" #include "hw/irq.h" #include "cpu-csr.h" -#include "tcg/tcg-ldst.h" target_ulong helper_csrrd_pgd(CPULoongArchState *env) { diff --git a/target/loongarch/iocsr_helper.c b/target/loongarch/iocsr_helper.c index 505853e17b..dda9845d6c 100644 --- a/target/loongarch/iocsr_helper.c +++ b/target/loongarch/iocsr_helper.c @@ -12,7 +12,6 @@ #include "exec/helper-proto.h" #include "exec/exec-all.h" #include "exec/cpu_ldst.h" -#include "tcg/tcg-ldst.h" #define GET_MEMTXATTRS(cas) \ ((MemTxAttrs){.requester_id = env_cpu(cas)->cpu_index})