From 86e4f93d827d3c1efd00cd8a906e38a2c0f2b5bc Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 25 Aug 2023 14:06:58 -0700 Subject: [PATCH 1/5] softmmu: Assert data in bounds in iotlb_to_section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acked-by: Alex Bennée Suggested-by: Alex Bennée Signed-off-by: Richard Henderson --- softmmu/physmem.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 3df73542e1..7597dc1c39 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -2413,9 +2413,15 @@ MemoryRegionSection *iotlb_to_section(CPUState *cpu, int asidx = cpu_asidx_from_attrs(cpu, attrs); CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx]; AddressSpaceDispatch *d = qatomic_rcu_read(&cpuas->memory_dispatch); - MemoryRegionSection *sections = d->map.sections; + int section_index = index & ~TARGET_PAGE_MASK; + MemoryRegionSection *ret; - return §ions[index & ~TARGET_PAGE_MASK]; + assert(section_index < d->map.sections_nb); + ret = d->map.sections + section_index; + assert(ret->mr); + assert(ret->mr->ops); + + return ret; } static void io_mem_init(void) From 0d58c660689f6da1e3feff8a997014003d928b3b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 25 Aug 2023 16:13:17 -0700 Subject: [PATCH 2/5] softmmu: Use async_run_on_cpu in tcg_commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After system startup, run the update to memory_dispatch and the tlb_flush on the cpu. This eliminates a race, wherein a running cpu sees the memory_dispatch change but has not yet seen the tlb_flush. Since the update now happens on the cpu, we need not use qatomic_rcu_read to protect the read of memory_dispatch. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1826 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1834 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1846 Tested-by: Alex Bennée Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec-common.c | 30 ---------------------------- include/exec/cpu-common.h | 1 - softmmu/physmem.c | 40 +++++++++++++++++++++++++++---------- 3 files changed, 29 insertions(+), 42 deletions(-) diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c index 9a5fabf625..7e35d7f4b5 100644 --- a/accel/tcg/cpu-exec-common.c +++ b/accel/tcg/cpu-exec-common.c @@ -33,36 +33,6 @@ void cpu_loop_exit_noexc(CPUState *cpu) cpu_loop_exit(cpu); } -#if defined(CONFIG_SOFTMMU) -void cpu_reloading_memory_map(void) -{ - if (qemu_in_vcpu_thread() && current_cpu->running) { - /* The guest can in theory prolong the RCU critical section as long - * as it feels like. The major problem with this is that because it - * can do multiple reconfigurations of the memory map within the - * critical section, we could potentially accumulate an unbounded - * collection of memory data structures awaiting reclamation. - * - * Because the only thing we're currently protecting with RCU is the - * memory data structures, it's sufficient to break the critical section - * in this callback, which we know will get called every time the - * memory map is rearranged. - * - * (If we add anything else in the system that uses RCU to protect - * its data structures, we will need to implement some other mechanism - * to force TCG CPUs to exit the critical section, at which point this - * part of this callback might become unnecessary.) - * - * This pair matches cpu_exec's rcu_read_lock()/rcu_read_unlock(), which - * only protects cpu->as->dispatch. Since we know our caller is about - * to reload it, it's safe to split the critical section. - */ - rcu_read_unlock(); - rcu_read_lock(); - } -} -#endif - void cpu_loop_exit(CPUState *cpu) { /* Undo the setting in cpu_tb_exec. */ diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 87dc9a752c..41788c0bdd 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -133,7 +133,6 @@ static inline void cpu_physical_memory_write(hwaddr addr, { cpu_physical_memory_rw(addr, (void *)buf, len, true); } -void cpu_reloading_memory_map(void); void *cpu_physical_memory_map(hwaddr addr, hwaddr *plen, bool is_write); diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 7597dc1c39..18277ddd67 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -680,8 +680,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr orig_addr, IOMMUTLBEntry iotlb; int iommu_idx; hwaddr addr = orig_addr; - AddressSpaceDispatch *d = - qatomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch); + AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch; for (;;) { section = address_space_translate_internal(d, addr, &addr, plen, false); @@ -2412,7 +2411,7 @@ MemoryRegionSection *iotlb_to_section(CPUState *cpu, { int asidx = cpu_asidx_from_attrs(cpu, attrs); CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx]; - AddressSpaceDispatch *d = qatomic_rcu_read(&cpuas->memory_dispatch); + AddressSpaceDispatch *d = cpuas->memory_dispatch; int section_index = index & ~TARGET_PAGE_MASK; MemoryRegionSection *ret; @@ -2487,23 +2486,42 @@ static void tcg_log_global_after_sync(MemoryListener *listener) } } +static void tcg_commit_cpu(CPUState *cpu, run_on_cpu_data data) +{ + CPUAddressSpace *cpuas = data.host_ptr; + + cpuas->memory_dispatch = address_space_to_dispatch(cpuas->as); + tlb_flush(cpu); +} + static void tcg_commit(MemoryListener *listener) { CPUAddressSpace *cpuas; - AddressSpaceDispatch *d; + CPUState *cpu; assert(tcg_enabled()); /* since each CPU stores ram addresses in its TLB cache, we must reset the modified entries */ cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener); - cpu_reloading_memory_map(); - /* The CPU and TLB are protected by the iothread lock. - * We reload the dispatch pointer now because cpu_reloading_memory_map() - * may have split the RCU critical section. + cpu = cpuas->cpu; + + /* + * Defer changes to as->memory_dispatch until the cpu is quiescent. + * Otherwise we race between (1) other cpu threads and (2) ongoing + * i/o for the current cpu thread, with data cached by mmu_lookup(). + * + * In addition, queueing the work function will kick the cpu back to + * the main loop, which will end the RCU critical section and reclaim + * the memory data structures. + * + * That said, the listener is also called during realize, before + * all of the tcg machinery for run-on is initialized: thus halt_cond. */ - d = address_space_to_dispatch(cpuas->as); - qatomic_rcu_set(&cpuas->memory_dispatch, d); - tlb_flush(cpuas->cpu); + if (cpu->halt_cond) { + async_run_on_cpu(cpu, tcg_commit_cpu, RUN_ON_CPU_HOST_PTR(cpuas)); + } else { + tcg_commit_cpu(cpu, RUN_ON_CPU_HOST_PTR(cpuas)); + } } static void memory_map_init(void) From 77fafcb50046f6c683f3cab9df975fdab43e1a68 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 28 Aug 2023 12:15:35 -0700 Subject: [PATCH 3/5] tcg: Remove vecop_list check from tcg_gen_not_vec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The not pattern is always available via generic expansion. See debug block in tcg_can_emit_vecop_list. Fixes: 11978f6f58 ("tcg: Fix expansion of INDEX_op_not_vec") Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/tcg-op-vec.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c index ad8ee08a7e..094298bb27 100644 --- a/tcg/tcg-op-vec.c +++ b/tcg/tcg-op-vec.c @@ -391,12 +391,11 @@ static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc) void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a) { - const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); - - if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) { + if (TCG_TARGET_HAS_not_vec) { + vec_gen_op2(INDEX_op_not_vec, 0, r, a); + } else { tcg_gen_xor_vec(0, r, a, tcg_constant_vec_matching(r, 0, -1)); } - tcg_swap_vecop_list(hold_list); } void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a) From dad2f2f5afbaf58d6056f31dfd4b9edd0854b8ab Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 22 Aug 2023 11:02:00 -0700 Subject: [PATCH 4/5] tcg/sparc64: Disable TCG_TARGET_HAS_extr_i64_i32 Since a59a29312660 ("tcg/sparc64: Remove sparc32plus constraints") we no longer distinguish registers with 32 vs 64 bits. Therefore we can remove support for the backend-specific type change opcodes. Signed-off-by: Richard Henderson --- tcg/sparc64/tcg-target.c.inc | 11 ----------- tcg/sparc64/tcg-target.h | 2 +- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index f2a346a1bd..81a08bb6c5 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -529,11 +529,6 @@ static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs) tcg_out_ext32u(s, rd, rs); } -static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rs) -{ - tcg_out_mov(s, TCG_TYPE_I32, rd, rs); -} - static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) { return false; @@ -1444,9 +1439,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_divu_i64: c = ARITH_UDIVX; goto gen_arith; - case INDEX_op_extrh_i64_i32: - tcg_out_arithi(s, a0, a1, 32, SHIFT_SRLX); - break; case INDEX_op_brcond_i64: tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3])); @@ -1501,7 +1493,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext32u_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_extrl_i64_i32: default: g_assert_not_reached(); } @@ -1533,8 +1524,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_ext32u_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_extrl_i64_i32: - case INDEX_op_extrh_i64_i32: case INDEX_op_qemu_ld_a32_i32: case INDEX_op_qemu_ld_a64_i32: case INDEX_op_qemu_ld_a32_i64: diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h index 3d41c9659b..5cfc4b4679 100644 --- a/tcg/sparc64/tcg-target.h +++ b/tcg/sparc64/tcg-target.h @@ -115,7 +115,7 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_extr_i64_i32 1 +#define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_rot_i64 0 From 669fd6151337fdc81e34f7eb4940ba2f20d89957 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 29 Aug 2023 11:29:09 -0700 Subject: [PATCH 5/5] Revert "include/exec: typedef abi_ptr to vaddr in softmmu" This reverts commit fc15bfb6a6bda8d4d01f1383579d385acae17c0f. This patch caused a regression for tricore-softmmu, ./tests/tcg/tricore-softmmu/test_boot_to_main.c.tst. Reported-by: Bastian Koppelmann Signed-off-by: Richard Henderson --- include/exec/cpu_ldst.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index f3ce4eb1d0..da10ba1433 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -121,8 +121,8 @@ static inline bool guest_range_valid_untagged(abi_ulong start, abi_ulong len) h2g_nocheck(x); \ }) #else -typedef vaddr abi_ptr; -#define TARGET_ABI_FMT_ptr "%016" VADDR_PRIx +typedef target_ulong abi_ptr; +#define TARGET_ABI_FMT_ptr TARGET_FMT_lx #endif uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr ptr);