From 8afc43ea631ffe3de889b84028c3ce430047301d Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 14 Mar 2023 11:18:13 +0100 Subject: [PATCH 01/13] s390x/gdb: Split s390-virt.xml Both TCG and KVM emulate ckc, cputm, last_break and prefix, and it's quite useful to have them during debugging. Right now they are grouped together with KVM-only pp, pfault_token, pfault_select and pfault_compare in s390-virt.xml, and are not available when debugging TCG-emulated code. Move KVM-only registers into the new s390-virt-kvm.xml file. Advertise s390-virt.xml always, and the new s390-virt-kvm.xml only for KVM. Signed-off-by: Ilya Leoshkevich Message-Id: <20230314101813.174874-1-iii@linux.ibm.com> Acked-by: David Hildenbrand Signed-off-by: Thomas Huth --- configs/targets/s390x-linux-user.mak | 2 +- configs/targets/s390x-softmmu.mak | 2 +- gdb-xml/s390-virt-kvm.xml | 14 ++++++ gdb-xml/s390-virt.xml | 4 -- target/s390x/gdbstub.c | 65 +++++++++++++++++++--------- 5 files changed, 61 insertions(+), 26 deletions(-) create mode 100644 gdb-xml/s390-virt-kvm.xml diff --git a/configs/targets/s390x-linux-user.mak b/configs/targets/s390x-linux-user.mak index e2978248ed..24c04c8589 100644 --- a/configs/targets/s390x-linux-user.mak +++ b/configs/targets/s390x-linux-user.mak @@ -2,4 +2,4 @@ TARGET_ARCH=s390x TARGET_SYSTBL_ABI=common,64 TARGET_SYSTBL=syscall.tbl TARGET_BIG_ENDIAN=y -TARGET_XML_FILES= gdb-xml/s390x-core64.xml gdb-xml/s390-acr.xml gdb-xml/s390-fpr.xml gdb-xml/s390-vx.xml gdb-xml/s390-cr.xml gdb-xml/s390-virt.xml gdb-xml/s390-gs.xml +TARGET_XML_FILES= gdb-xml/s390x-core64.xml gdb-xml/s390-acr.xml gdb-xml/s390-fpr.xml gdb-xml/s390-vx.xml gdb-xml/s390-cr.xml gdb-xml/s390-virt.xml gdb-xml/s390-virt-kvm.xml gdb-xml/s390-gs.xml diff --git a/configs/targets/s390x-softmmu.mak b/configs/targets/s390x-softmmu.mak index 258b4cf358..70d2f9f0ba 100644 --- a/configs/targets/s390x-softmmu.mak +++ b/configs/targets/s390x-softmmu.mak @@ -1,4 +1,4 @@ TARGET_ARCH=s390x TARGET_BIG_ENDIAN=y TARGET_SUPPORTS_MTTCG=y -TARGET_XML_FILES= gdb-xml/s390x-core64.xml gdb-xml/s390-acr.xml gdb-xml/s390-fpr.xml gdb-xml/s390-vx.xml gdb-xml/s390-cr.xml gdb-xml/s390-virt.xml gdb-xml/s390-gs.xml +TARGET_XML_FILES= gdb-xml/s390x-core64.xml gdb-xml/s390-acr.xml gdb-xml/s390-fpr.xml gdb-xml/s390-vx.xml gdb-xml/s390-cr.xml gdb-xml/s390-virt.xml gdb-xml/s390-virt-kvm.xml gdb-xml/s390-gs.xml diff --git a/gdb-xml/s390-virt-kvm.xml b/gdb-xml/s390-virt-kvm.xml new file mode 100644 index 0000000000..a256eddaf5 --- /dev/null +++ b/gdb-xml/s390-virt-kvm.xml @@ -0,0 +1,14 @@ + + + + + + + + + + diff --git a/gdb-xml/s390-virt.xml b/gdb-xml/s390-virt.xml index e2e9a7ad3c..438eb68aab 100644 --- a/gdb-xml/s390-virt.xml +++ b/gdb-xml/s390-virt.xml @@ -11,8 +11,4 @@ - - - - diff --git a/target/s390x/gdbstub.c b/target/s390x/gdbstub.c index 0cb69395b4..6fbfd41bc8 100644 --- a/target/s390x/gdbstub.c +++ b/target/s390x/gdbstub.c @@ -206,12 +206,8 @@ static int cpu_write_c_reg(CPUS390XState *env, uint8_t *mem_buf, int n) #define S390_VIRT_CPUTM_REGNUM 1 #define S390_VIRT_BEA_REGNUM 2 #define S390_VIRT_PREFIX_REGNUM 3 -#define S390_VIRT_PP_REGNUM 4 -#define S390_VIRT_PFT_REGNUM 5 -#define S390_VIRT_PFS_REGNUM 6 -#define S390_VIRT_PFC_REGNUM 7 /* total number of registers in s390-virt.xml */ -#define S390_NUM_VIRT_REGS 8 +#define S390_NUM_VIRT_REGS 4 static int cpu_read_virt_reg(CPUS390XState *env, GByteArray *mem_buf, int n) { @@ -224,14 +220,6 @@ static int cpu_read_virt_reg(CPUS390XState *env, GByteArray *mem_buf, int n) return gdb_get_regl(mem_buf, env->gbea); case S390_VIRT_PREFIX_REGNUM: return gdb_get_regl(mem_buf, env->psa); - case S390_VIRT_PP_REGNUM: - return gdb_get_regl(mem_buf, env->pp); - case S390_VIRT_PFT_REGNUM: - return gdb_get_regl(mem_buf, env->pfault_token); - case S390_VIRT_PFS_REGNUM: - return gdb_get_regl(mem_buf, env->pfault_select); - case S390_VIRT_PFC_REGNUM: - return gdb_get_regl(mem_buf, env->pfault_compare); default: return 0; } @@ -256,19 +244,51 @@ static int cpu_write_virt_reg(CPUS390XState *env, uint8_t *mem_buf, int n) env->psa = ldtul_p(mem_buf); cpu_synchronize_post_init(env_cpu(env)); return 8; - case S390_VIRT_PP_REGNUM: + default: + return 0; + } +} + +/* the values represent the positions in s390-virt-kvm.xml */ +#define S390_VIRT_KVM_PP_REGNUM 0 +#define S390_VIRT_KVM_PFT_REGNUM 1 +#define S390_VIRT_KVM_PFS_REGNUM 2 +#define S390_VIRT_KVM_PFC_REGNUM 3 +/* total number of registers in s390-virt-kvm.xml */ +#define S390_NUM_VIRT_KVM_REGS 4 + +static int cpu_read_virt_kvm_reg(CPUS390XState *env, GByteArray *mem_buf, int n) +{ + switch (n) { + case S390_VIRT_KVM_PP_REGNUM: + return gdb_get_regl(mem_buf, env->pp); + case S390_VIRT_KVM_PFT_REGNUM: + return gdb_get_regl(mem_buf, env->pfault_token); + case S390_VIRT_KVM_PFS_REGNUM: + return gdb_get_regl(mem_buf, env->pfault_select); + case S390_VIRT_KVM_PFC_REGNUM: + return gdb_get_regl(mem_buf, env->pfault_compare); + default: + return 0; + } +} + +static int cpu_write_virt_kvm_reg(CPUS390XState *env, uint8_t *mem_buf, int n) +{ + switch (n) { + case S390_VIRT_KVM_PP_REGNUM: env->pp = ldtul_p(mem_buf); cpu_synchronize_post_init(env_cpu(env)); return 8; - case S390_VIRT_PFT_REGNUM: + case S390_VIRT_KVM_PFT_REGNUM: env->pfault_token = ldtul_p(mem_buf); cpu_synchronize_post_init(env_cpu(env)); return 8; - case S390_VIRT_PFS_REGNUM: + case S390_VIRT_KVM_PFS_REGNUM: env->pfault_select = ldtul_p(mem_buf); cpu_synchronize_post_init(env_cpu(env)); return 8; - case S390_VIRT_PFC_REGNUM: + case S390_VIRT_KVM_PFC_REGNUM: env->pfault_compare = ldtul_p(mem_buf); cpu_synchronize_post_init(env_cpu(env)); return 8; @@ -321,10 +341,15 @@ void s390_cpu_gdb_init(CPUState *cs) cpu_write_c_reg, S390_NUM_C_REGS, "s390-cr.xml", 0); + gdb_register_coprocessor(cs, cpu_read_virt_reg, + cpu_write_virt_reg, + S390_NUM_VIRT_REGS, "s390-virt.xml", 0); + if (kvm_enabled()) { - gdb_register_coprocessor(cs, cpu_read_virt_reg, - cpu_write_virt_reg, - S390_NUM_VIRT_REGS, "s390-virt.xml", 0); + gdb_register_coprocessor(cs, cpu_read_virt_kvm_reg, + cpu_write_virt_kvm_reg, + S390_NUM_VIRT_KVM_REGS, "s390-virt-kvm.xml", + 0); } #endif } From c4c289cc5f6109a8c131943a7554679ffb42c03f Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 19 Apr 2023 12:30:18 +0200 Subject: [PATCH 02/13] hw/rdma: Remove unused macros PG_DIR_SZ and PG_TBL_SZ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit They have apparently never been used. Message-Id: <20230419103018.627115-1-thuth@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Huth --- hw/rdma/rdma_rm.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c index cfd85de3e6..038d564433 100644 --- a/hw/rdma/rdma_rm.c +++ b/hw/rdma/rdma_rm.c @@ -23,10 +23,6 @@ #include "rdma_backend.h" #include "rdma_rm.h" -/* Page directory and page tables */ -#define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) } -#define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) } - void rdma_format_device_counters(RdmaDeviceResources *dev_res, GString *buf) { g_string_append_printf(buf, "\ttx : %" PRId64 "\n", From c02578b1fa0345d3da7dcdacf30ea2c246336257 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 19 Apr 2023 13:49:37 +0200 Subject: [PATCH 03/13] hw/rdma: Compile target-independent parts of the rdma code only once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some files of the rdma code do not depend on any target specific macros. Compile these only once to save some time during the build. Message-Id: <20230419114937.667221-1-thuth@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Huth --- hw/rdma/meson.build | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/hw/rdma/meson.build b/hw/rdma/meson.build index 7325f40c32..fc7917192f 100644 --- a/hw/rdma/meson.build +++ b/hw/rdma/meson.build @@ -1,10 +1,12 @@ -specific_ss.add(when: 'CONFIG_VMW_PVRDMA', if_true: files( +softmmu_ss.add(when: 'CONFIG_VMW_PVRDMA', if_true: files( 'rdma.c', 'rdma_backend.c', - 'rdma_rm.c', 'rdma_utils.c', + 'vmw/pvrdma_qp_ops.c', +)) +specific_ss.add(when: 'CONFIG_VMW_PVRDMA', if_true: files( + 'rdma_rm.c', 'vmw/pvrdma_cmd.c', 'vmw/pvrdma_dev_ring.c', 'vmw/pvrdma_main.c', - 'vmw/pvrdma_qp_ops.c', )) From e017462db554bedd82fa3bb86cd72d9e38684dd4 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 19 Apr 2023 13:13:37 +0200 Subject: [PATCH 04/13] hw/rdma: VMW_PVRDMA should depend on VMXNET3_PCI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "pvrdma" device is only usable in conjunction with the "vmxnet3" NIC - see the check for TYPE_VMXNET3 in pvrdma_realize(). By adding this dependency, the amount of total files that have to be compiled for a configuration with all targets decreases by 64 files (!), since the rdma code is marked as target specific and thus got recompiled for all targets that enable PCI so far. Message-Id: <20230419111337.651673-1-thuth@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Huth --- hw/rdma/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rdma/Kconfig b/hw/rdma/Kconfig index 8e2211288f..840320bdc0 100644 --- a/hw/rdma/Kconfig +++ b/hw/rdma/Kconfig @@ -1,3 +1,3 @@ config VMW_PVRDMA default y if PCI_DEVICES - depends on PVRDMA && PCI && MSI_NONBROKEN + depends on PVRDMA && MSI_NONBROKEN && VMXNET3_PCI From 3ee7f21ed292966f5cd3eb71aa06f8ffc0e5ae41 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 27 Apr 2023 14:54:23 +0200 Subject: [PATCH 05/13] tests: vhost-user-test: release mutex on protocol violation chr_read() is printing an error message and returning with s->data_mutex taken. This can potentially cause a hang. Reported by Coverity. Signed-off-by: Paolo Bonzini Message-Id: <20230427125423.103536-1-pbonzini@redhat.com> Reviewed-by: Thomas Huth Signed-off-by: Thomas Huth --- tests/qtest/vhost-user-test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c index bf9f7c4248..e4f95b2858 100644 --- a/tests/qtest/vhost-user-test.c +++ b/tests/qtest/vhost-user-test.c @@ -351,7 +351,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) if (size != msg.size) { qos_printf("%s: Wrong message size received %d != %d\n", __func__, size, msg.size); - return; + goto out; } } @@ -509,6 +509,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) break; } +out: g_mutex_unlock(&s->data_mutex); } From a2e1753b8054344f32cf94f31c6399a58794a380 Mon Sep 17 00:00:00 2001 From: Alexander Bulekov Date: Thu, 27 Apr 2023 17:10:06 -0400 Subject: [PATCH 06/13] memory: prevent dma-reentracy issues Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA. This flag is set/checked prior to calling a device's MemoryRegion handlers, and set when device code initiates DMA. The purpose of this flag is to prevent two types of DMA-based reentrancy issues: 1.) mmio -> dma -> mmio case 2.) bh -> dma write -> mmio case These issues have led to problems such as stack-exhaustion and use-after-frees. Summary of the problem from Peter Maydell: https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282 Resolves: CVE-2023-0330 Signed-off-by: Alexander Bulekov Reviewed-by: Thomas Huth Message-Id: <20230427211013.2994127-2-alxndr@bu.edu> [thuth: Replace warn_report() with warn_report_once()] Signed-off-by: Thomas Huth --- include/exec/memory.h | 5 +++++ include/hw/qdev-core.h | 7 +++++++ softmmu/memory.c | 16 ++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/include/exec/memory.h b/include/exec/memory.h index 15ade918ba..e45ce6061f 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -767,6 +767,8 @@ struct MemoryRegion { bool is_iommu; RAMBlock *ram_block; Object *owner; + /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */ + DeviceState *dev; const MemoryRegionOps *ops; void *opaque; @@ -791,6 +793,9 @@ struct MemoryRegion { unsigned ioeventfd_nb; MemoryRegionIoeventfd *ioeventfds; RamDiscardManager *rdm; /* Only for RAM */ + + /* For devices designed to perform re-entrant IO into their own IO MRs */ + bool disable_reentrancy_guard; }; struct IOMMUMemoryRegion { diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index bd50ad5ee1..7623703943 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -162,6 +162,10 @@ struct NamedClockList { QLIST_ENTRY(NamedClockList) node; }; +typedef struct { + bool engaged_in_io; +} MemReentrancyGuard; + /** * DeviceState: * @realized: Indicates whether the device has been fully constructed. @@ -194,6 +198,9 @@ struct DeviceState { int alias_required_for_version; ResettableState reset; GSList *unplug_blockers; + + /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */ + MemReentrancyGuard mem_reentrancy_guard; }; struct DeviceListener { diff --git a/softmmu/memory.c b/softmmu/memory.c index b1a6cae6f5..b7b3386e9d 100644 --- a/softmmu/memory.c +++ b/softmmu/memory.c @@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, access_size_max = 4; } + /* Do not allow more than one simultaneous access to a device's IO Regions */ + if (mr->dev && !mr->disable_reentrancy_guard && + !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) { + if (mr->dev->mem_reentrancy_guard.engaged_in_io) { + warn_report_once("Blocked re-entrant IO on MemoryRegion: " + "%s at addr: 0x%" HWADDR_PRIX, + memory_region_name(mr), addr); + return MEMTX_ACCESS_ERROR; + } + mr->dev->mem_reentrancy_guard.engaged_in_io = true; + } + /* FIXME: support unaligned access? */ access_size = MAX(MIN(size, access_size_max), access_size_min); access_mask = MAKE_64BIT_MASK(0, access_size * 8); @@ -556,6 +568,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, access_mask, attrs); } } + if (mr->dev) { + mr->dev->mem_reentrancy_guard.engaged_in_io = false; + } return r; } @@ -1170,6 +1185,7 @@ static void memory_region_do_init(MemoryRegion *mr, } mr->name = g_strdup(name); mr->owner = owner; + mr->dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE); mr->ram_block = NULL; if (name) { From 9c86c97f12c060bf7484dd931f38634e166a81f0 Mon Sep 17 00:00:00 2001 From: Alexander Bulekov Date: Thu, 27 Apr 2023 17:10:07 -0400 Subject: [PATCH 07/13] async: Add an optional reentrancy guard to the BH API Devices can pass their MemoryReentrancyGuard (from their DeviceState), when creating new BHes. Then, the async API will toggle the guard before/after calling the BH call-back. This prevents bh->mmio reentrancy issues. Signed-off-by: Alexander Bulekov Reviewed-by: Darren Kenny Message-Id: <20230427211013.2994127-3-alxndr@bu.edu> [thuth: Fix "line over 90 characters" checkpatch.pl error] Signed-off-by: Thomas Huth --- docs/devel/multiple-iothreads.txt | 7 +++++++ include/block/aio.h | 18 ++++++++++++++++-- include/qemu/main-loop.h | 7 +++++-- tests/unit/ptimer-test-stubs.c | 3 ++- util/async.c | 18 +++++++++++++++++- util/main-loop.c | 6 ++++-- util/trace-events | 1 + 7 files changed, 52 insertions(+), 8 deletions(-) diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt index 343120f2ef..a3e949f6b3 100644 --- a/docs/devel/multiple-iothreads.txt +++ b/docs/devel/multiple-iothreads.txt @@ -61,6 +61,7 @@ There are several old APIs that use the main loop AioContext: * LEGACY qemu_aio_set_event_notifier() - monitor an event notifier * LEGACY timer_new_ms() - create a timer * LEGACY qemu_bh_new() - create a BH + * LEGACY qemu_bh_new_guarded() - create a BH with a device re-entrancy guard * LEGACY qemu_aio_wait() - run an event loop iteration Since they implicitly work on the main loop they cannot be used in code that @@ -72,8 +73,14 @@ Instead, use the AioContext functions directly (see include/block/aio.h): * aio_set_event_notifier() - monitor an event notifier * aio_timer_new() - create a timer * aio_bh_new() - create a BH + * aio_bh_new_guarded() - create a BH with a device re-entrancy guard * aio_poll() - run an event loop iteration +The qemu_bh_new_guarded/aio_bh_new_guarded APIs accept a "MemReentrancyGuard" +argument, which is used to check for and prevent re-entrancy problems. For +BHs associated with devices, the reentrancy-guard is contained in the +corresponding DeviceState and named "mem_reentrancy_guard". + The AioContext can be obtained from the IOThread using iothread_get_aio_context() or for the main loop using qemu_get_aio_context(). Code that takes an AioContext argument works both in IOThreads or the main diff --git a/include/block/aio.h b/include/block/aio.h index e267d918fd..89bbc536f9 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -23,6 +23,8 @@ #include "qemu/thread.h" #include "qemu/timer.h" #include "block/graph-lock.h" +#include "hw/qdev-core.h" + typedef struct BlockAIOCB BlockAIOCB; typedef void BlockCompletionFunc(void *opaque, int ret); @@ -323,9 +325,11 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, * is opaque and must be allocated prior to its use. * * @name: A human-readable identifier for debugging purposes. + * @reentrancy_guard: A guard set when entering a cb to prevent + * device-reentrancy issues */ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, - const char *name); + const char *name, MemReentrancyGuard *reentrancy_guard); /** * aio_bh_new: Allocate a new bottom half structure @@ -334,7 +338,17 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, * string. */ #define aio_bh_new(ctx, cb, opaque) \ - aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb))) + aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL) + +/** + * aio_bh_new_guarded: Allocate a new bottom half structure with a + * reentrancy_guard + * + * A convenience wrapper for aio_bh_new_full() that uses the cb as the name + * string. + */ +#define aio_bh_new_guarded(ctx, cb, opaque, guard) \ + aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard) /** * aio_notify: Force processing of pending events. diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h index b3e54e00bc..68e70e61aa 100644 --- a/include/qemu/main-loop.h +++ b/include/qemu/main-loop.h @@ -387,9 +387,12 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms); /* internal interfaces */ +#define qemu_bh_new_guarded(cb, opaque, guard) \ + qemu_bh_new_full((cb), (opaque), (stringify(cb)), guard) #define qemu_bh_new(cb, opaque) \ - qemu_bh_new_full((cb), (opaque), (stringify(cb))) -QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name); + qemu_bh_new_full((cb), (opaque), (stringify(cb)), NULL) +QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, + MemReentrancyGuard *reentrancy_guard); void qemu_bh_schedule_idle(QEMUBH *bh); enum { diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c index f2bfcede93..8c9407c560 100644 --- a/tests/unit/ptimer-test-stubs.c +++ b/tests/unit/ptimer-test-stubs.c @@ -107,7 +107,8 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask) return deadline; } -QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) +QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, + MemReentrancyGuard *reentrancy_guard) { QEMUBH *bh = g_new(QEMUBH, 1); diff --git a/util/async.c b/util/async.c index 21016a1ac7..a9b528c370 100644 --- a/util/async.c +++ b/util/async.c @@ -65,6 +65,7 @@ struct QEMUBH { void *opaque; QSLIST_ENTRY(QEMUBH) next; unsigned flags; + MemReentrancyGuard *reentrancy_guard; }; /* Called concurrently from any thread */ @@ -137,7 +138,7 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, } QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, - const char *name) + const char *name, MemReentrancyGuard *reentrancy_guard) { QEMUBH *bh; bh = g_new(QEMUBH, 1); @@ -146,13 +147,28 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, .cb = cb, .opaque = opaque, .name = name, + .reentrancy_guard = reentrancy_guard, }; return bh; } void aio_bh_call(QEMUBH *bh) { + bool last_engaged_in_io = false; + + if (bh->reentrancy_guard) { + last_engaged_in_io = bh->reentrancy_guard->engaged_in_io; + if (bh->reentrancy_guard->engaged_in_io) { + trace_reentrant_aio(bh->ctx, bh->name); + } + bh->reentrancy_guard->engaged_in_io = true; + } + bh->cb(bh->opaque); + + if (bh->reentrancy_guard) { + bh->reentrancy_guard->engaged_in_io = last_engaged_in_io; + } } /* Multiple occurrences of aio_bh_poll cannot be called concurrently. */ diff --git a/util/main-loop.c b/util/main-loop.c index e180c85145..7022f02ef8 100644 --- a/util/main-loop.c +++ b/util/main-loop.c @@ -605,9 +605,11 @@ void main_loop_wait(int nonblocking) /* Functions to operate on the main QEMU AioContext. */ -QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) +QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, + MemReentrancyGuard *reentrancy_guard) { - return aio_bh_new_full(qemu_aio_context, cb, opaque, name); + return aio_bh_new_full(qemu_aio_context, cb, opaque, name, + reentrancy_guard); } /* diff --git a/util/trace-events b/util/trace-events index 16f78d8fe5..3f7e766683 100644 --- a/util/trace-events +++ b/util/trace-events @@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d" # async.c aio_co_schedule(void *ctx, void *co) "ctx %p co %p" aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p" +reentrant_aio(void *ctx, const char *name) "ctx %p name %s" # thread-pool.c thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p" From ef56ffbdd6b0605dc1e305611287b948c970e236 Mon Sep 17 00:00:00 2001 From: Alexander Bulekov Date: Thu, 27 Apr 2023 17:10:08 -0400 Subject: [PATCH 08/13] checkpatch: add qemu_bh_new/aio_bh_new checks Advise authors to use the _guarded versions of the APIs, instead. Signed-off-by: Alexander Bulekov Reviewed-by: Darren Kenny Message-Id: <20230427211013.2994127-4-alxndr@bu.edu> Signed-off-by: Thomas Huth --- scripts/checkpatch.pl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index d768171dcf..eeaec436eb 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2865,6 +2865,14 @@ sub process { if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) { ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr); } +# recommend qemu_bh_new_guarded instead of qemu_bh_new + if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\bqemu_bh_new\s*\(/) { + ERROR("use qemu_bh_new_guarded() instead of qemu_bh_new() to avoid reentrancy problems\n" . $herecurr); + } +# recommend aio_bh_new_guarded instead of aio_bh_new + if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\baio_bh_new\s*\(/) { + ERROR("use aio_bh_new_guarded() instead of aio_bh_new() to avoid reentrancy problems\n" . $herecurr); + } # check for module_init(), use category-specific init macros explicitly please if ($line =~ /^module_init\s*\(/) { ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr); From f63192b0544af5d3e4d5edfd85ab520fcf671377 Mon Sep 17 00:00:00 2001 From: Alexander Bulekov Date: Thu, 27 Apr 2023 17:10:09 -0400 Subject: [PATCH 09/13] hw: replace most qemu_bh_new calls with qemu_bh_new_guarded This protects devices from bh->mmio reentrancy issues. Thanks: Thomas Huth for diagnosing OS X test failure. Signed-off-by: Alexander Bulekov Reviewed-by: Darren Kenny Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Reviewed-by: Paul Durrant Reviewed-by: Thomas Huth Message-Id: <20230427211013.2994127-5-alxndr@bu.edu> Signed-off-by: Thomas Huth --- hw/9pfs/xen-9p-backend.c | 5 ++++- hw/block/dataplane/virtio-blk.c | 3 ++- hw/block/dataplane/xen-block.c | 5 +++-- hw/char/virtio-serial-bus.c | 3 ++- hw/display/qxl.c | 9 ++++++--- hw/display/virtio-gpu.c | 6 ++++-- hw/ide/ahci.c | 3 ++- hw/ide/ahci_internal.h | 1 + hw/ide/core.c | 4 +++- hw/misc/imx_rngc.c | 6 ++++-- hw/misc/macio/mac_dbdma.c | 2 +- hw/net/virtio-net.c | 3 ++- hw/nvme/ctrl.c | 6 ++++-- hw/scsi/mptsas.c | 3 ++- hw/scsi/scsi-bus.c | 3 ++- hw/scsi/vmw_pvscsi.c | 3 ++- hw/usb/dev-uas.c | 3 ++- hw/usb/hcd-dwc2.c | 3 ++- hw/usb/hcd-ehci.c | 3 ++- hw/usb/hcd-uhci.c | 2 +- hw/usb/host-libusb.c | 6 ++++-- hw/usb/redirect.c | 6 ++++-- hw/usb/xen-usb.c | 3 ++- hw/virtio/virtio-balloon.c | 5 +++-- hw/virtio/virtio-crypto.c | 3 ++- 25 files changed, 66 insertions(+), 33 deletions(-) diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c index 74f3a05f88..0e266c552b 100644 --- a/hw/9pfs/xen-9p-backend.c +++ b/hw/9pfs/xen-9p-backend.c @@ -61,6 +61,7 @@ typedef struct Xen9pfsDev { int num_rings; Xen9pfsRing *rings; + MemReentrancyGuard mem_reentrancy_guard; } Xen9pfsDev; static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev); @@ -443,7 +444,9 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev) xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data + XEN_FLEX_RING_SIZE(ring_order); - xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]); + xen_9pdev->rings[i].bh = qemu_bh_new_guarded(xen_9pfs_bh, + &xen_9pdev->rings[i], + &xen_9pdev->mem_reentrancy_guard); xen_9pdev->rings[i].out_cons = 0; xen_9pdev->rings[i].out_size = 0; xen_9pdev->rings[i].inprogress = false; diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index b28d81737e..a6202997ee 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -127,7 +127,8 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, } else { s->ctx = qemu_get_aio_context(); } - s->bh = aio_bh_new(s->ctx, notify_guest_bh, s); + s->bh = aio_bh_new_guarded(s->ctx, notify_guest_bh, s, + &DEVICE(vdev)->mem_reentrancy_guard); s->batch_notify_vqs = bitmap_new(conf->num_queues); *dataplane = s; diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c index 734da42ea7..d8bc39d359 100644 --- a/hw/block/dataplane/xen-block.c +++ b/hw/block/dataplane/xen-block.c @@ -633,8 +633,9 @@ XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev, } else { dataplane->ctx = qemu_get_aio_context(); } - dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh, - dataplane); + dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh, + dataplane, + &DEVICE(xendev)->mem_reentrancy_guard); return dataplane; } diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c index 7d4601cb5d..dd619f0731 100644 --- a/hw/char/virtio-serial-bus.c +++ b/hw/char/virtio-serial-bus.c @@ -985,7 +985,8 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp) return; } - port->bh = qemu_bh_new(flush_queued_data_bh, port); + port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port, + &dev->mem_reentrancy_guard); port->elem = NULL; } diff --git a/hw/display/qxl.c b/hw/display/qxl.c index 80ce1e9a93..f1c0eb7dfc 100644 --- a/hw/display/qxl.c +++ b/hw/display/qxl.c @@ -2201,11 +2201,14 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp) qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl); - qxl->update_irq = qemu_bh_new(qxl_update_irq_bh, qxl); + qxl->update_irq = qemu_bh_new_guarded(qxl_update_irq_bh, qxl, + &DEVICE(qxl)->mem_reentrancy_guard); qxl_reset_state(qxl); - qxl->update_area_bh = qemu_bh_new(qxl_render_update_area_bh, qxl); - qxl->ssd.cursor_bh = qemu_bh_new(qemu_spice_cursor_refresh_bh, &qxl->ssd); + qxl->update_area_bh = qemu_bh_new_guarded(qxl_render_update_area_bh, qxl, + &DEVICE(qxl)->mem_reentrancy_guard); + qxl->ssd.cursor_bh = qemu_bh_new_guarded(qemu_spice_cursor_refresh_bh, &qxl->ssd, + &DEVICE(qxl)->mem_reentrancy_guard); } static void qxl_realize_primary(PCIDevice *dev, Error **errp) diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index 5e15c79b94..66ac9b6cc5 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -1339,8 +1339,10 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) g->ctrl_vq = virtio_get_queue(vdev, 0); g->cursor_vq = virtio_get_queue(vdev, 1); - g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g); - g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g); + g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g, + &qdev->mem_reentrancy_guard); + g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g, + &qdev->mem_reentrancy_guard); QTAILQ_INIT(&g->reslist); QTAILQ_INIT(&g->cmdq); QTAILQ_INIT(&g->fenceq); diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 55902e1df7..4e76d6b191 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -1509,7 +1509,8 @@ static void ahci_cmd_done(const IDEDMA *dma) ahci_write_fis_d2h(ad); if (ad->port_regs.cmd_issue && !ad->check_bh) { - ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad); + ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad, + &ad->mem_reentrancy_guard); qemu_bh_schedule(ad->check_bh); } } diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h index 303fcd7235..2480455372 100644 --- a/hw/ide/ahci_internal.h +++ b/hw/ide/ahci_internal.h @@ -321,6 +321,7 @@ struct AHCIDevice { bool init_d2h_sent; AHCICmdHdr *cur_cmd; NCQTransferState ncq_tfs[AHCI_MAX_CMDS]; + MemReentrancyGuard mem_reentrancy_guard; }; struct AHCIPCIState { diff --git a/hw/ide/core.c b/hw/ide/core.c index 45d14a25e9..de48ff9f86 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -513,6 +513,7 @@ BlockAIOCB *ide_issue_trim( BlockCompletionFunc *cb, void *cb_opaque, void *opaque) { IDEState *s = opaque; + IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master; TrimAIOCB *iocb; /* Paired with a decrement in ide_trim_bh_cb() */ @@ -520,7 +521,8 @@ BlockAIOCB *ide_issue_trim( iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque); iocb->s = s; - iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb); + iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb, + &DEVICE(dev)->mem_reentrancy_guard); iocb->ret = 0; iocb->qiov = qiov; iocb->i = -1; diff --git a/hw/misc/imx_rngc.c b/hw/misc/imx_rngc.c index 632c03779c..082c6980ad 100644 --- a/hw/misc/imx_rngc.c +++ b/hw/misc/imx_rngc.c @@ -228,8 +228,10 @@ static void imx_rngc_realize(DeviceState *dev, Error **errp) sysbus_init_mmio(sbd, &s->iomem); sysbus_init_irq(sbd, &s->irq); - s->self_test_bh = qemu_bh_new(imx_rngc_self_test, s); - s->seed_bh = qemu_bh_new(imx_rngc_seed, s); + s->self_test_bh = qemu_bh_new_guarded(imx_rngc_self_test, s, + &dev->mem_reentrancy_guard); + s->seed_bh = qemu_bh_new_guarded(imx_rngc_seed, s, + &dev->mem_reentrancy_guard); } static void imx_rngc_reset(DeviceState *dev) diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c index 43bb1f56ba..80a789f32b 100644 --- a/hw/misc/macio/mac_dbdma.c +++ b/hw/misc/macio/mac_dbdma.c @@ -914,7 +914,7 @@ static void mac_dbdma_realize(DeviceState *dev, Error **errp) { DBDMAState *s = MAC_DBDMA(dev); - s->bh = qemu_bh_new(DBDMA_run_bh, s); + s->bh = qemu_bh_new_guarded(DBDMA_run_bh, s, &dev->mem_reentrancy_guard); } static void mac_dbdma_class_init(ObjectClass *oc, void *data) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 53e1c32643..447f669921 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -2917,7 +2917,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index) n->vqs[index].tx_vq = virtio_add_queue(vdev, n->net_conf.tx_queue_size, virtio_net_handle_tx_bh); - n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); + n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index], + &DEVICE(vdev)->mem_reentrancy_guard); } n->vqs[index].tx_waiting = 0; diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index f59dfe1cbe..fd917fcda1 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4607,7 +4607,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry); } - sq->bh = qemu_bh_new(nvme_process_sq, sq); + sq->bh = qemu_bh_new_guarded(nvme_process_sq, sq, + &DEVICE(sq->ctrl)->mem_reentrancy_guard); if (n->dbbuf_enabled) { sq->db_addr = n->dbbuf_dbs + (sqid << 3); @@ -5253,7 +5254,8 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, } } n->cq[cqid] = cq; - cq->bh = qemu_bh_new(nvme_post_cqes, cq); + cq->bh = qemu_bh_new_guarded(nvme_post_cqes, cq, + &DEVICE(cq->ctrl)->mem_reentrancy_guard); } static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c index c485da792c..3de288b454 100644 --- a/hw/scsi/mptsas.c +++ b/hw/scsi/mptsas.c @@ -1322,7 +1322,8 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp) } s->max_devices = MPTSAS_NUM_PORTS; - s->request_bh = qemu_bh_new(mptsas_fetch_requests, s); + s->request_bh = qemu_bh_new_guarded(mptsas_fetch_requests, s, + &DEVICE(dev)->mem_reentrancy_guard); scsi_bus_init(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info); } diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index c97176110c..3c20b47ad0 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -193,7 +193,8 @@ static void scsi_dma_restart_cb(void *opaque, bool running, RunState state) AioContext *ctx = blk_get_aio_context(s->conf.blk); /* The reference is dropped in scsi_dma_restart_bh.*/ object_ref(OBJECT(s)); - s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s); + s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s, + &DEVICE(s)->mem_reentrancy_guard); qemu_bh_schedule(s->bh); } } diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c index fa76696855..4de34536e9 100644 --- a/hw/scsi/vmw_pvscsi.c +++ b/hw/scsi/vmw_pvscsi.c @@ -1184,7 +1184,8 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp) pcie_endpoint_cap_init(pci_dev, PVSCSI_EXP_EP_OFFSET); } - s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s); + s->completion_worker = qemu_bh_new_guarded(pvscsi_process_completion_queue, s, + &DEVICE(pci_dev)->mem_reentrancy_guard); scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info); /* override default SCSI bus hotplug-handler, with pvscsi's one */ diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c index 88f99c05d5..f013ded91e 100644 --- a/hw/usb/dev-uas.c +++ b/hw/usb/dev-uas.c @@ -937,7 +937,8 @@ static void usb_uas_realize(USBDevice *dev, Error **errp) QTAILQ_INIT(&uas->results); QTAILQ_INIT(&uas->requests); - uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas); + uas->status_bh = qemu_bh_new_guarded(usb_uas_send_status_bh, uas, + &d->mem_reentrancy_guard); dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE); scsi_bus_init(&uas->bus, sizeof(uas->bus), DEVICE(dev), &usb_uas_scsi_info); diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c index 8755e9cbb0..a0c4e782b2 100644 --- a/hw/usb/hcd-dwc2.c +++ b/hw/usb/hcd-dwc2.c @@ -1364,7 +1364,8 @@ static void dwc2_realize(DeviceState *dev, Error **errp) s->fi = USB_FRMINTVL - 1; s->eof_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_frame_boundary, s); s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_work_timer, s); - s->async_bh = qemu_bh_new(dwc2_work_bh, s); + s->async_bh = qemu_bh_new_guarded(dwc2_work_bh, s, + &dev->mem_reentrancy_guard); sysbus_init_irq(sbd, &s->irq); } diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c index d4da8dcb8d..c930c60921 100644 --- a/hw/usb/hcd-ehci.c +++ b/hw/usb/hcd-ehci.c @@ -2533,7 +2533,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp) } s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s); - s->async_bh = qemu_bh_new(ehci_work_bh, s); + s->async_bh = qemu_bh_new_guarded(ehci_work_bh, s, + &dev->mem_reentrancy_guard); s->device = dev; s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s); diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c index 8ac1175ad2..77baaa7a6b 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c @@ -1190,7 +1190,7 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL); } } - s->bh = qemu_bh_new(uhci_bh, s); + s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard); s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s); s->num_ports_vmstate = NB_PORTS; QTAILQ_INIT(&s->queues); diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c index 176868d345..f500db85ab 100644 --- a/hw/usb/host-libusb.c +++ b/hw/usb/host-libusb.c @@ -1141,7 +1141,8 @@ static void usb_host_nodev_bh(void *opaque) static void usb_host_nodev(USBHostDevice *s) { if (!s->bh_nodev) { - s->bh_nodev = qemu_bh_new(usb_host_nodev_bh, s); + s->bh_nodev = qemu_bh_new_guarded(usb_host_nodev_bh, s, + &DEVICE(s)->mem_reentrancy_guard); } qemu_bh_schedule(s->bh_nodev); } @@ -1739,7 +1740,8 @@ static int usb_host_post_load(void *opaque, int version_id) USBHostDevice *dev = opaque; if (!dev->bh_postld) { - dev->bh_postld = qemu_bh_new(usb_host_post_load_bh, dev); + dev->bh_postld = qemu_bh_new_guarded(usb_host_post_load_bh, dev, + &DEVICE(dev)->mem_reentrancy_guard); } qemu_bh_schedule(dev->bh_postld); dev->bh_postld_pending = true; diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c index fd7df599bc..39fbaaab16 100644 --- a/hw/usb/redirect.c +++ b/hw/usb/redirect.c @@ -1441,8 +1441,10 @@ static void usbredir_realize(USBDevice *udev, Error **errp) } } - dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev); - dev->device_reject_bh = qemu_bh_new(usbredir_device_reject_bh, dev); + dev->chardev_close_bh = qemu_bh_new_guarded(usbredir_chardev_close_bh, dev, + &DEVICE(dev)->mem_reentrancy_guard); + dev->device_reject_bh = qemu_bh_new_guarded(usbredir_device_reject_bh, dev, + &DEVICE(dev)->mem_reentrancy_guard); dev->attach_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, usbredir_do_attach, dev); packet_id_queue_init(&dev->cancelled, dev, "cancelled"); diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c index 66cb3f7c24..38ee660a30 100644 --- a/hw/usb/xen-usb.c +++ b/hw/usb/xen-usb.c @@ -1032,7 +1032,8 @@ static void usbback_alloc(struct XenLegacyDevice *xendev) QTAILQ_INIT(&usbif->req_free_q); QSIMPLEQ_INIT(&usbif->hotplug_q); - usbif->bh = qemu_bh_new(usbback_bh, usbif); + usbif->bh = qemu_bh_new_guarded(usbback_bh, usbif, + &DEVICE(xendev)->mem_reentrancy_guard); } static int usbback_free(struct XenLegacyDevice *xendev) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index fd06fcfb3f..d004cf29d2 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -886,8 +886,9 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) precopy_add_notifier(&s->free_page_hint_notify); object_ref(OBJECT(s->iothread)); - s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), - virtio_ballloon_get_free_page_hints, s); + s->free_page_bh = aio_bh_new_guarded(iothread_get_aio_context(s->iothread), + virtio_ballloon_get_free_page_hints, s, + &dev->mem_reentrancy_guard); } if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) { diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c index 802e1b9659..2fe804510f 100644 --- a/hw/virtio/virtio-crypto.c +++ b/hw/virtio/virtio-crypto.c @@ -1074,7 +1074,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp) vcrypto->vqs[i].dataq = virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh); vcrypto->vqs[i].dataq_bh = - qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]); + qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i], + &dev->mem_reentrancy_guard); vcrypto->vqs[i].vcrypto = vcrypto; } From bfd6e7ae6a72b84e2eb9574f56e6ec037f05182c Mon Sep 17 00:00:00 2001 From: Alexander Bulekov Date: Thu, 27 Apr 2023 17:10:10 -0400 Subject: [PATCH 10/13] lsi53c895a: disable reentrancy detection for script RAM As the code is designed to use the memory APIs to access the script ram, disable reentrancy checks for the pseudo-RAM ram_io MemoryRegion. In the future, ram_io may be converted from an IO to a proper RAM MemoryRegion. Reported-by: Fiona Ebner Signed-off-by: Alexander Bulekov Reviewed-by: Thomas Huth Reviewed-by: Darren Kenny Message-Id: <20230427211013.2994127-6-alxndr@bu.edu> Signed-off-by: Thomas Huth --- hw/scsi/lsi53c895a.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c index af93557a9a..db27872963 100644 --- a/hw/scsi/lsi53c895a.c +++ b/hw/scsi/lsi53c895a.c @@ -2302,6 +2302,12 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp) memory_region_init_io(&s->io_io, OBJECT(s), &lsi_io_ops, s, "lsi-io", 256); + /* + * Since we use the address-space API to interact with ram_io, disable the + * re-entrancy guard. + */ + s->ram_io.disable_reentrancy_guard = true; + address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io"); qdev_init_gpio_out(d, &s->ext_irq, 1); From 985c4a4e547afb9573b6bd6843d20eb2c3d1d1cd Mon Sep 17 00:00:00 2001 From: Alexander Bulekov Date: Thu, 27 Apr 2023 17:10:11 -0400 Subject: [PATCH 11/13] bcm2835_property: disable reentrancy detection for iomem As the code is designed for re-entrant calls from bcm2835_property to bcm2835_mbox and back into bcm2835_property, mark iomem as reentrancy-safe. Signed-off-by: Alexander Bulekov Reviewed-by: Thomas Huth Message-Id: <20230427211013.2994127-7-alxndr@bu.edu> Signed-off-by: Thomas Huth --- hw/misc/bcm2835_property.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c index 890ae7bae5..de056ea2df 100644 --- a/hw/misc/bcm2835_property.c +++ b/hw/misc/bcm2835_property.c @@ -382,6 +382,13 @@ static void bcm2835_property_init(Object *obj) memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_property_ops, s, TYPE_BCM2835_PROPERTY, 0x10); + + /* + * bcm2835_property_ops call into bcm2835_mbox, which in-turn reads from + * iomem. As such, mark iomem as re-entracy safe. + */ + s->iomem.disable_reentrancy_guard = true; + sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem); sysbus_init_irq(SYS_BUS_DEVICE(s), &s->mbox_irq); } From 6dad5a6810d9c60ca320d01276f6133bbcfa1fc7 Mon Sep 17 00:00:00 2001 From: Alexander Bulekov Date: Thu, 27 Apr 2023 17:10:12 -0400 Subject: [PATCH 12/13] raven: disable reentrancy detection for iomem As the code is designed for re-entrant calls from raven_io_ops to pci-conf, mark raven_io_ops as reentrancy-safe. Signed-off-by: Alexander Bulekov Message-Id: <20230427211013.2994127-8-alxndr@bu.edu> Signed-off-by: Thomas Huth --- hw/pci-host/raven.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c index 072ffe3c5e..9a11ac4b2b 100644 --- a/hw/pci-host/raven.c +++ b/hw/pci-host/raven.c @@ -294,6 +294,13 @@ static void raven_pcihost_initfn(Object *obj) memory_region_init(&s->pci_memory, obj, "pci-memory", 0x3f000000); address_space_init(&s->pci_io_as, &s->pci_io, "raven-io"); + /* + * Raven's raven_io_ops use the address-space API to access pci-conf-idx + * (which is also owned by the raven device). As such, mark the + * pci_io_non_contiguous as re-entrancy safe. + */ + s->pci_io_non_contiguous.disable_reentrancy_guard = true; + /* CPU address space */ memory_region_add_subregion(address_space_mem, PCI_IO_BASE_ADDR, &s->pci_io); From 50795ee051a342c681a9b45671c552fbd6274db8 Mon Sep 17 00:00:00 2001 From: Alexander Bulekov Date: Thu, 27 Apr 2023 17:10:13 -0400 Subject: [PATCH 13/13] apic: disable reentrancy detection for apic-msi As the code is designed for re-entrant calls to apic-msi, mark apic-msi as reentrancy-safe. Signed-off-by: Alexander Bulekov Reviewed-by: Darren Kenny Message-Id: <20230427211013.2994127-9-alxndr@bu.edu> Signed-off-by: Thomas Huth --- hw/intc/apic.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hw/intc/apic.c b/hw/intc/apic.c index 20b5a94073..ac3d47d231 100644 --- a/hw/intc/apic.c +++ b/hw/intc/apic.c @@ -885,6 +885,13 @@ static void apic_realize(DeviceState *dev, Error **errp) memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi", APIC_SPACE_SIZE); + /* + * apic-msi's apic_mem_write can call into ioapic_eoi_broadcast, which can + * write back to apic-msi. As such mark the apic-msi region re-entrancy + * safe. + */ + s->io_memory.disable_reentrancy_guard = true; + s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s); local_apics[s->id] = s;