From b4e44c9944e19c8bfc7fbf0c4a6a5e48f3ba3dc0 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 19 May 2020 15:30:41 +0200 Subject: [PATCH 01/11] io_uring: retry io_uring_submit() if it fails with errno=EINTR As recently documented [1], io_uring_enter(2) syscall can return an error (errno=EINTR) if the operation was interrupted by a delivery of a signal before it could complete. This should happen when IORING_ENTER_GETEVENTS flag is used, for example during io_uring_submit_and_wait() or during io_uring_submit() when IORING_SETUP_IOPOLL is enabled. We shouldn't have this problem for now, but it's better to prevent it. [1] https://github.com/axboe/liburing/commit/344355ec6619de8f4e64584c9736530b5346e4f4 Signed-off-by: Stefano Garzarella Message-id: 20200519133041.112138-1-sgarzare@redhat.com Signed-off-by: Stefan Hajnoczi --- block/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/io_uring.c b/block/io_uring.c index a3142ca989..9765681f7c 100644 --- a/block/io_uring.c +++ b/block/io_uring.c @@ -231,7 +231,7 @@ static int ioq_submit(LuringState *s) trace_luring_io_uring_submit(s, ret); /* Prevent infinite loop if submission is refused */ if (ret <= 0) { - if (ret == -EAGAIN) { + if (ret == -EAGAIN || ret == -EINTR) { continue; } break; From 769335ecb1e8fd9c4317bdff7cfd0f84af7ab2f9 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 19 May 2020 15:49:42 +0200 Subject: [PATCH 02/11] io_uring: use io_uring_cq_ready() to check for ready cqes In qemu_luring_poll_cb() we are not using the cqe peeked from the CQ ring. We are using io_uring_peek_cqe() only to see if there are cqes ready, so we can replace it with io_uring_cq_ready(). Signed-off-by: Stefano Garzarella Message-id: 20200519134942.118178-1-sgarzare@redhat.com Signed-off-by: Stefan Hajnoczi --- block/io_uring.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/block/io_uring.c b/block/io_uring.c index 9765681f7c..037af09471 100644 --- a/block/io_uring.c +++ b/block/io_uring.c @@ -277,13 +277,10 @@ static void qemu_luring_completion_cb(void *opaque) static bool qemu_luring_poll_cb(void *opaque) { LuringState *s = opaque; - struct io_uring_cqe *cqes; - if (io_uring_peek_cqe(&s->ring, &cqes) == 0) { - if (cqes) { - luring_process_completions_and_submit(s); - return true; - } + if (io_uring_cq_ready(&s->ring)) { + luring_process_completions_and_submit(s); + return true; } return false; From 7a071a96d3ef48095892c1d1075c0181c8940058 Mon Sep 17 00:00:00 2001 From: Alexander Bulekov Date: Mon, 11 May 2020 23:01:30 -0400 Subject: [PATCH 03/11] fuzz: add datadir for oss-fuzz compatability This allows us to keep pc-bios in executable_dir/pc-bios, rather than executable_dir/../pc-bios, which is incompatible with oss-fuzz' file structure. Signed-off-by: Alexander Bulekov Reviewed-by: Darren Kenny Message-id: 20200512030133.29896-2-alxndr@bu.edu Signed-off-by: Stefan Hajnoczi --- include/sysemu/sysemu.h | 2 ++ softmmu/vl.c | 2 +- tests/qtest/fuzz/fuzz.c | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 26c0c802d1..3efccdba7e 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -15,6 +15,8 @@ extern const char *qemu_name; extern QemuUUID qemu_uuid; extern bool qemu_uuid_set; +void qemu_add_data_dir(const char *path); + void qemu_add_exit_notifier(Notifier *notify); void qemu_remove_exit_notifier(Notifier *notify); diff --git a/softmmu/vl.c b/softmmu/vl.c index ae5451bc23..05d1a4cb6b 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -1993,7 +1993,7 @@ char *qemu_find_file(int type, const char *name) return NULL; } -static void qemu_add_data_dir(const char *path) +void qemu_add_data_dir(const char *path) { int i; diff --git a/tests/qtest/fuzz/fuzz.c b/tests/qtest/fuzz/fuzz.c index f5c923852e..33365c3782 100644 --- a/tests/qtest/fuzz/fuzz.c +++ b/tests/qtest/fuzz/fuzz.c @@ -137,6 +137,7 @@ int LLVMFuzzerInitialize(int *argc, char ***argv, char ***envp) { char *target_name; + char *dir; /* Initialize qgraph and modules */ qos_graph_init(); @@ -147,6 +148,20 @@ int LLVMFuzzerInitialize(int *argc, char ***argv, char ***envp) target_name = strstr(**argv, "-target-"); if (target_name) { /* The binary name specifies the target */ target_name += strlen("-target-"); + /* + * With oss-fuzz, the executable is kept in the root of a directory (we + * cannot assume the path). All data (including bios binaries) must be + * in the same dir, or a subdir. Thus, we cannot place the pc-bios so + * that it would be in exec_dir/../pc-bios. + * As a workaround, oss-fuzz allows us to use argv[0] to get the + * location of the executable. Using this we add exec_dir/pc-bios to + * the datadirs. + */ + dir = g_build_filename(g_path_get_dirname(**argv), "pc-bios", NULL); + if (g_file_test(dir, G_FILE_TEST_IS_DIR)) { + qemu_add_data_dir(dir); + } + g_free(dir); } else if (*argc > 1) { /* The target is specified as an argument */ target_name = (*argv)[1]; if (!strstr(target_name, "--fuzz-target=")) { From 6851803a467238ed39408e35b5f2063c1370b156 Mon Sep 17 00:00:00 2001 From: Alexander Bulekov Date: Mon, 11 May 2020 23:01:31 -0400 Subject: [PATCH 04/11] fuzz: fix typo in i440fx-qtest-reboot arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Alexander Bulekov Reviewed-by: Darren Kenny Reviewed-by: Philippe Mathieu-Daudé Message-id: 20200512030133.29896-3-alxndr@bu.edu Signed-off-by: Stefan Hajnoczi --- tests/qtest/fuzz/i440fx_fuzz.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/qtest/fuzz/i440fx_fuzz.c b/tests/qtest/fuzz/i440fx_fuzz.c index bcd6769b4c..775b3041ba 100644 --- a/tests/qtest/fuzz/i440fx_fuzz.c +++ b/tests/qtest/fuzz/i440fx_fuzz.c @@ -156,7 +156,7 @@ static void i440fx_fuzz_qos_fork(QTestState *s, } static const char *i440fx_qtest_argv = TARGET_NAME " -machine accel=qtest" - "-m 0 -display none"; + " -m 0 -display none"; static const char *i440fx_argv(FuzzTarget *t) { return i440fx_qtest_argv; From 3b113229c5d5477d34f54fce0a3e8781090c93b6 Mon Sep 17 00:00:00 2001 From: Alexander Bulekov Date: Mon, 11 May 2020 23:01:32 -0400 Subject: [PATCH 05/11] fuzz: add mangled object name to linker script Previously, we relied on "FuzzerTracePC*(.bss*)" to place libfuzzer's fuzzer::TPC object into our contiguous shared-memory region. This does not work for some libfuzzer builds, so this addition identifies the region by its mangled name: *(.bss._ZN6fuzzer3TPCE); Signed-off-by: Alexander Bulekov Reviewed-by: Darren Kenny Message-id: 20200512030133.29896-4-alxndr@bu.edu Signed-off-by: Stefan Hajnoczi --- tests/qtest/fuzz/fork_fuzz.ld | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/qtest/fuzz/fork_fuzz.ld b/tests/qtest/fuzz/fork_fuzz.ld index e086bba873..bfb667ed06 100644 --- a/tests/qtest/fuzz/fork_fuzz.ld +++ b/tests/qtest/fuzz/fork_fuzz.ld @@ -28,6 +28,11 @@ SECTIONS /* Internal Libfuzzer TracePC object which contains the ValueProfileMap */ FuzzerTracePC*(.bss*); + /* + * In case the above line fails, explicitly specify the (mangled) name of + * the object we care about + */ + *(.bss._ZN6fuzzer3TPCE); } .data.fuzz_end : ALIGN(4K) { From dfd5ddb5680511a2aa5576d8ed01ff214cc0fc03 Mon Sep 17 00:00:00 2001 From: Alexander Bulekov Date: Mon, 11 May 2020 23:01:33 -0400 Subject: [PATCH 06/11] fuzz: run the main-loop in fork-server process Without this, the time since the last main-loop keeps increasing, as the fuzzer runs. The forked children need to handle all the "past-due" timers, slowing them down, over time. With this change, the parent/fork-server process runs the main-loop, while waiting on the child, ensuring that the timer events do not pile up, over time. Signed-off-by: Alexander Bulekov Reviewed-by: Darren Kenny Message-id: 20200512030133.29896-5-alxndr@bu.edu Signed-off-by: Stefan Hajnoczi --- tests/qtest/fuzz/i440fx_fuzz.c | 1 + tests/qtest/fuzz/virtio_net_fuzz.c | 2 ++ tests/qtest/fuzz/virtio_scsi_fuzz.c | 2 ++ 3 files changed, 5 insertions(+) diff --git a/tests/qtest/fuzz/i440fx_fuzz.c b/tests/qtest/fuzz/i440fx_fuzz.c index 775b3041ba..e2f31e56f9 100644 --- a/tests/qtest/fuzz/i440fx_fuzz.c +++ b/tests/qtest/fuzz/i440fx_fuzz.c @@ -151,6 +151,7 @@ static void i440fx_fuzz_qos_fork(QTestState *s, i440fx_fuzz_qos(s, Data, Size); _Exit(0); } else { + flush_events(s); wait(NULL); } } diff --git a/tests/qtest/fuzz/virtio_net_fuzz.c b/tests/qtest/fuzz/virtio_net_fuzz.c index d08a47e278..a33bd73067 100644 --- a/tests/qtest/fuzz/virtio_net_fuzz.c +++ b/tests/qtest/fuzz/virtio_net_fuzz.c @@ -122,6 +122,7 @@ static void virtio_net_fork_fuzz(QTestState *s, flush_events(s); _Exit(0); } else { + flush_events(s); wait(NULL); } } @@ -134,6 +135,7 @@ static void virtio_net_fork_fuzz_check_used(QTestState *s, flush_events(s); _Exit(0); } else { + flush_events(s); wait(NULL); } } diff --git a/tests/qtest/fuzz/virtio_scsi_fuzz.c b/tests/qtest/fuzz/virtio_scsi_fuzz.c index 3b95247f12..51dce491ab 100644 --- a/tests/qtest/fuzz/virtio_scsi_fuzz.c +++ b/tests/qtest/fuzz/virtio_scsi_fuzz.c @@ -145,6 +145,7 @@ static void virtio_scsi_fork_fuzz(QTestState *s, flush_events(s); _Exit(0); } else { + flush_events(s); wait(NULL); } } @@ -164,6 +165,7 @@ static void virtio_scsi_with_flag_fuzz(QTestState *s, } _Exit(0); } else { + flush_events(s); wait(NULL); } } From 4dfe59d187d9b218efca8d89c0c2fac1298d8712 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 8 May 2020 08:24:53 +0200 Subject: [PATCH 07/11] memory: Rename memory_region_do_writeback -> memory_region_writeback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We usually use '_do_' for internal functions. Rename memory_region_do_writeback() as memory_region_writeback(). Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Stefan Hajnoczi Reviewed-by: Richard Henderson Acked-by: Paolo Bonzini Message-id: 20200508062456.23344-2-philmd@redhat.com Signed-off-by: Stefan Hajnoczi --- include/exec/memory.h | 4 ++-- memory.c | 2 +- target/arm/helper.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index e000bd2f97..1b7cfdd5b6 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1474,14 +1474,14 @@ void *memory_region_get_ram_ptr(MemoryRegion *mr); void memory_region_ram_resize(MemoryRegion *mr, ram_addr_t newsize, Error **errp); /** - * memory_region_do_writeback: Trigger cache writeback or msync for + * memory_region_writeback: Trigger cache writeback or msync for * selected address range * * @mr: the memory region to be updated * @addr: the initial address of the range to be written back * @size: the size of the range to be written back */ -void memory_region_do_writeback(MemoryRegion *mr, hwaddr addr, hwaddr size); +void memory_region_writeback(MemoryRegion *mr, hwaddr addr, hwaddr size); /** * memory_region_set_log: Turn dirty logging on or off for a region. diff --git a/memory.c b/memory.c index fd6f3d6aca..bb84334e1e 100644 --- a/memory.c +++ b/memory.c @@ -2198,7 +2198,7 @@ void memory_region_ram_resize(MemoryRegion *mr, ram_addr_t newsize, Error **errp } -void memory_region_do_writeback(MemoryRegion *mr, hwaddr addr, hwaddr size) +void memory_region_writeback(MemoryRegion *mr, hwaddr addr, hwaddr size) { /* * Might be extended case needed to cover diff --git a/target/arm/helper.c b/target/arm/helper.c index a92ae55672..972a766730 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -6813,7 +6813,7 @@ static void dccvap_writefn(CPUARMState *env, const ARMCPRegInfo *opaque, mr = memory_region_from_host(haddr, &offset); if (mr) { - memory_region_do_writeback(mr, offset, dline_size); + memory_region_writeback(mr, offset, dline_size); } } } From 9ecc996a3d39bdbf64a488936f97a9496b74ebd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 8 May 2020 08:24:54 +0200 Subject: [PATCH 08/11] memory: Extract memory_region_msync() from memory_region_writeback() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Suggested-by: Paolo Bonzini Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Stefan Hajnoczi Reviewed-by: Richard Henderson Acked-by: Paolo Bonzini Message-id: 20200508062456.23344-3-philmd@redhat.com Signed-off-by: Stefan Hajnoczi --- include/exec/memory.h | 13 ++++++++++++- memory.c | 10 ++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index 1b7cfdd5b6..3e00cdbbfa 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1473,8 +1473,19 @@ void *memory_region_get_ram_ptr(MemoryRegion *mr); */ void memory_region_ram_resize(MemoryRegion *mr, ram_addr_t newsize, Error **errp); + /** - * memory_region_writeback: Trigger cache writeback or msync for + * memory_region_msync: Synchronize selected address range of + * a memory mapped region + * + * @mr: the memory region to be msync + * @addr: the initial address of the range to be sync + * @size: the size of the range to be sync + */ +void memory_region_msync(MemoryRegion *mr, hwaddr addr, hwaddr size); + +/** + * memory_region_writeback: Trigger cache writeback for * selected address range * * @mr: the memory region to be updated diff --git a/memory.c b/memory.c index bb84334e1e..93febe4759 100644 --- a/memory.c +++ b/memory.c @@ -2197,6 +2197,12 @@ void memory_region_ram_resize(MemoryRegion *mr, ram_addr_t newsize, Error **errp qemu_ram_resize(mr->ram_block, newsize, errp); } +void memory_region_msync(MemoryRegion *mr, hwaddr addr, hwaddr size) +{ + if (mr->ram_block) { + qemu_ram_writeback(mr->ram_block, addr, size); + } +} void memory_region_writeback(MemoryRegion *mr, hwaddr addr, hwaddr size) { @@ -2204,8 +2210,8 @@ void memory_region_writeback(MemoryRegion *mr, hwaddr addr, hwaddr size) * Might be extended case needed to cover * different types of memory regions */ - if (mr->ram_block && mr->dirty_log_mask) { - qemu_ram_writeback(mr->ram_block, addr, size); + if (mr->dirty_log_mask) { + memory_region_msync(mr, addr, size); } } From bc2a2364b8050632a3b3de07f30d88b7f0734845 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 8 May 2020 08:24:55 +0200 Subject: [PATCH 09/11] hw/block: Let the NVMe emulated device be target-agnostic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now than the non-target specific memory_region_msync() function is available, use it to make this device target-agnostic. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Stefan Hajnoczi Reviewed-by: Richard Henderson Acked-by: Paolo Bonzini Message-id: 20200508062456.23344-4-philmd@redhat.com Signed-off-by: Stefan Hajnoczi --- hw/block/Makefile.objs | 2 +- hw/block/nvme.c | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs index 47960b5f0d..8855c22656 100644 --- a/hw/block/Makefile.objs +++ b/hw/block/Makefile.objs @@ -13,6 +13,6 @@ common-obj-$(CONFIG_SH4) += tc58128.o obj-$(CONFIG_VIRTIO_BLK) += virtio-blk.o obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk.o -obj-$(CONFIG_NVME_PCI) += nvme.o +common-obj-$(CONFIG_NVME_PCI) += nvme.o obj-y += dataplane/ diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 2f3100e56c..a21eeca2fb 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -46,8 +46,7 @@ #include "qapi/visitor.h" #include "sysemu/hostmem.h" #include "sysemu/block-backend.h" -#include "exec/ram_addr.h" - +#include "exec/memory.h" #include "qemu/log.h" #include "qemu/module.h" #include "qemu/cutils.h" @@ -1207,8 +1206,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) */ if (addr == 0xE08 && (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) { - qemu_ram_writeback(n->pmrdev->mr.ram_block, - 0, n->pmrdev->size); + memory_region_msync(&n->pmrdev->mr, 0, n->pmrdev->size); } memcpy(&val, ptr + addr, size); } else { From ab7e41e6679224e5ad8da6d70ed7e645a5a482ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 8 May 2020 08:24:56 +0200 Subject: [PATCH 10/11] exec: Rename qemu_ram_writeback() as qemu_ram_msync() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename qemu_ram_writeback() as qemu_ram_msync() to better match what it does. Suggested-by: Stefan Hajnoczi Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Stefan Hajnoczi Reviewed-by: Richard Henderson Acked-by: Paolo Bonzini Message-id: 20200508062456.23344-5-philmd@redhat.com Signed-off-by: Stefan Hajnoczi --- exec.c | 2 +- include/exec/ram_addr.h | 4 ++-- memory.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/exec.c b/exec.c index 5162f0d12f..3d4c94a9dc 100644 --- a/exec.c +++ b/exec.c @@ -2127,7 +2127,7 @@ int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp) * Otherwise no-op. * @Note: this is supposed to be a synchronous op. */ -void qemu_ram_writeback(RAMBlock *block, ram_addr_t start, ram_addr_t length) +void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length) { /* The requested range should fit in within the block range */ g_assert((start + length) <= block->used_length); diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 5e59a3d8d7..b295f6a784 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -136,12 +136,12 @@ void qemu_ram_free(RAMBlock *block); int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp); -void qemu_ram_writeback(RAMBlock *block, ram_addr_t start, ram_addr_t length); +void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length); /* Clear whole block of mem */ static inline void qemu_ram_block_writeback(RAMBlock *block) { - qemu_ram_writeback(block, 0, block->used_length); + qemu_ram_msync(block, 0, block->used_length); } #define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1) diff --git a/memory.c b/memory.c index 93febe4759..91ceaf9fcf 100644 --- a/memory.c +++ b/memory.c @@ -2200,7 +2200,7 @@ void memory_region_ram_resize(MemoryRegion *mr, ram_addr_t newsize, Error **errp void memory_region_msync(MemoryRegion *mr, hwaddr addr, hwaddr size) { if (mr->ram_block) { - qemu_ram_writeback(mr->ram_block, addr, size); + qemu_ram_msync(mr->ram_block, addr, size); } } From 7d2410cea154bf915fb30179ebda3b17ac36e70e Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 20 May 2020 17:49:01 +0300 Subject: [PATCH 11/11] block: Factor out bdrv_run_co() We have a few bdrv_*() functions that can either spawn a new coroutine and wait for it with BDRV_POLL_WHILE() or use a fastpath if they are alreeady running in a coroutine. All of them duplicate basically the same code. Factor the common code into a new function bdrv_run_co(). Signed-off-by: Kevin Wolf Signed-off-by: Vladimir Sementsov-Ogievskiy Message-id: 20200520144901.16589-1-vsementsov@virtuozzo.com [Factor out bdrv_run_co_entry too] Signed-off-by: Stefan Hajnoczi --- block/io.c | 193 ++++++++++++++++++++--------------------------------- 1 file changed, 72 insertions(+), 121 deletions(-) diff --git a/block/io.c b/block/io.c index 121ce17a49..df8f2a98d4 100644 --- a/block/io.c +++ b/block/io.c @@ -35,8 +35,6 @@ #include "qemu/main-loop.h" #include "sysemu/replay.h" -#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ - /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) @@ -891,29 +889,63 @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, return 0; } +typedef int coroutine_fn BdrvRequestEntry(void *opaque); +typedef struct BdrvRunCo { + BdrvRequestEntry *entry; + void *opaque; + int ret; + bool done; + Coroutine *co; /* Coroutine, running bdrv_run_co_entry, for debugging */ +} BdrvRunCo; + +static void coroutine_fn bdrv_run_co_entry(void *opaque) +{ + BdrvRunCo *arg = opaque; + + arg->ret = arg->entry(arg->opaque); + arg->done = true; + aio_wait_kick(); +} + +static int bdrv_run_co(BlockDriverState *bs, BdrvRequestEntry *entry, + void *opaque) +{ + if (qemu_in_coroutine()) { + /* Fast-path if already in coroutine context */ + return entry(opaque); + } else { + BdrvRunCo s = { .entry = entry, .opaque = opaque }; + + s.co = qemu_coroutine_create(bdrv_run_co_entry, &s); + bdrv_coroutine_enter(bs, s.co); + + BDRV_POLL_WHILE(bs, !s.done); + + return s.ret; + } +} + typedef struct RwCo { BdrvChild *child; int64_t offset; QEMUIOVector *qiov; bool is_write; - int ret; BdrvRequestFlags flags; } RwCo; -static void coroutine_fn bdrv_rw_co_entry(void *opaque) +static int coroutine_fn bdrv_rw_co_entry(void *opaque) { RwCo *rwco = opaque; if (!rwco->is_write) { - rwco->ret = bdrv_co_preadv(rwco->child, rwco->offset, - rwco->qiov->size, rwco->qiov, - rwco->flags); + return bdrv_co_preadv(rwco->child, rwco->offset, + rwco->qiov->size, rwco->qiov, + rwco->flags); } else { - rwco->ret = bdrv_co_pwritev(rwco->child, rwco->offset, - rwco->qiov->size, rwco->qiov, - rwco->flags); + return bdrv_co_pwritev(rwco->child, rwco->offset, + rwco->qiov->size, rwco->qiov, + rwco->flags); } - aio_wait_kick(); } /* @@ -923,25 +955,15 @@ static int bdrv_prwv_co(BdrvChild *child, int64_t offset, QEMUIOVector *qiov, bool is_write, BdrvRequestFlags flags) { - Coroutine *co; RwCo rwco = { .child = child, .offset = offset, .qiov = qiov, .is_write = is_write, - .ret = NOT_DONE, .flags = flags, }; - if (qemu_in_coroutine()) { - /* Fast-path if already in coroutine context */ - bdrv_rw_co_entry(&rwco); - } else { - co = qemu_coroutine_create(bdrv_rw_co_entry, &rwco); - bdrv_coroutine_enter(child->bs, co); - BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE); - } - return rwco.ret; + return bdrv_run_co(child->bs, bdrv_rw_co_entry, &rwco); } int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, @@ -2229,8 +2251,6 @@ typedef struct BdrvCoBlockStatusData { int64_t *pnum; int64_t *map; BlockDriverState **file; - int ret; - bool done; } BdrvCoBlockStatusData; int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs, @@ -2484,16 +2504,14 @@ static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs, } /* Coroutine wrapper for bdrv_block_status_above() */ -static void coroutine_fn bdrv_block_status_above_co_entry(void *opaque) +static int coroutine_fn bdrv_block_status_above_co_entry(void *opaque) { BdrvCoBlockStatusData *data = opaque; - data->ret = bdrv_co_block_status_above(data->bs, data->base, - data->want_zero, - data->offset, data->bytes, - data->pnum, data->map, data->file); - data->done = true; - aio_wait_kick(); + return bdrv_co_block_status_above(data->bs, data->base, + data->want_zero, + data->offset, data->bytes, + data->pnum, data->map, data->file); } /* @@ -2508,7 +2526,6 @@ static int bdrv_common_block_status_above(BlockDriverState *bs, int64_t *map, BlockDriverState **file) { - Coroutine *co; BdrvCoBlockStatusData data = { .bs = bs, .base = base, @@ -2518,18 +2535,9 @@ static int bdrv_common_block_status_above(BlockDriverState *bs, .pnum = pnum, .map = map, .file = file, - .done = false, }; - if (qemu_in_coroutine()) { - /* Fast-path if already in coroutine context */ - bdrv_block_status_above_co_entry(&data); - } else { - co = qemu_coroutine_create(bdrv_block_status_above_co_entry, &data); - bdrv_coroutine_enter(bs, co); - BDRV_POLL_WHILE(bs, !data.done); - } - return data.ret; + return bdrv_run_co(bs, bdrv_block_status_above_co_entry, &data); } int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, @@ -2630,7 +2638,6 @@ typedef struct BdrvVmstateCo { QEMUIOVector *qiov; int64_t pos; bool is_read; - int ret; } BdrvVmstateCo; static int coroutine_fn @@ -2658,33 +2665,25 @@ bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos, return ret; } -static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque) +static int coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque) { BdrvVmstateCo *co = opaque; - co->ret = bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read); - aio_wait_kick(); + + return bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read); } static inline int bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos, bool is_read) { - if (qemu_in_coroutine()) { - return bdrv_co_rw_vmstate(bs, qiov, pos, is_read); - } else { - BdrvVmstateCo data = { - .bs = bs, - .qiov = qiov, - .pos = pos, - .is_read = is_read, - .ret = -EINPROGRESS, - }; - Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data); + BdrvVmstateCo data = { + .bs = bs, + .qiov = qiov, + .pos = pos, + .is_read = is_read, + }; - bdrv_coroutine_enter(bs, co); - BDRV_POLL_WHILE(bs, data.ret == -EINPROGRESS); - return data.ret; - } + return bdrv_run_co(bs, bdrv_co_rw_vmstate_entry, &data); } int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, @@ -2762,18 +2761,9 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb) /**************************************************************/ /* Coroutine block device emulation */ -typedef struct FlushCo { - BlockDriverState *bs; - int ret; -} FlushCo; - - -static void coroutine_fn bdrv_flush_co_entry(void *opaque) +static int coroutine_fn bdrv_flush_co_entry(void *opaque) { - FlushCo *rwco = opaque; - - rwco->ret = bdrv_co_flush(rwco->bs); - aio_wait_kick(); + return bdrv_co_flush(opaque); } int coroutine_fn bdrv_co_flush(BlockDriverState *bs) @@ -2890,36 +2880,20 @@ early_exit: int bdrv_flush(BlockDriverState *bs) { - Coroutine *co; - FlushCo flush_co = { - .bs = bs, - .ret = NOT_DONE, - }; - - if (qemu_in_coroutine()) { - /* Fast-path if already in coroutine context */ - bdrv_flush_co_entry(&flush_co); - } else { - co = qemu_coroutine_create(bdrv_flush_co_entry, &flush_co); - bdrv_coroutine_enter(bs, co); - BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE); - } - - return flush_co.ret; + return bdrv_run_co(bs, bdrv_flush_co_entry, bs); } typedef struct DiscardCo { BdrvChild *child; int64_t offset; int64_t bytes; - int ret; } DiscardCo; -static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque) + +static int coroutine_fn bdrv_pdiscard_co_entry(void *opaque) { DiscardCo *rwco = opaque; - rwco->ret = bdrv_co_pdiscard(rwco->child, rwco->offset, rwco->bytes); - aio_wait_kick(); + return bdrv_co_pdiscard(rwco->child, rwco->offset, rwco->bytes); } int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, @@ -3038,24 +3012,13 @@ out: int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes) { - Coroutine *co; DiscardCo rwco = { .child = child, .offset = offset, .bytes = bytes, - .ret = NOT_DONE, }; - if (qemu_in_coroutine()) { - /* Fast-path if already in coroutine context */ - bdrv_pdiscard_co_entry(&rwco); - } else { - co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco); - bdrv_coroutine_enter(child->bs, co); - BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE); - } - - return rwco.ret; + return bdrv_run_co(child->bs, bdrv_pdiscard_co_entry, &rwco); } int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf) @@ -3463,21 +3426,19 @@ typedef struct TruncateCo { PreallocMode prealloc; BdrvRequestFlags flags; Error **errp; - int ret; } TruncateCo; -static void coroutine_fn bdrv_truncate_co_entry(void *opaque) +static int coroutine_fn bdrv_truncate_co_entry(void *opaque) { TruncateCo *tco = opaque; - tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->exact, - tco->prealloc, tco->flags, tco->errp); - aio_wait_kick(); + + return bdrv_co_truncate(tco->child, tco->offset, tco->exact, + tco->prealloc, tco->flags, tco->errp); } int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, PreallocMode prealloc, BdrvRequestFlags flags, Error **errp) { - Coroutine *co; TruncateCo tco = { .child = child, .offset = offset, @@ -3485,17 +3446,7 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, .prealloc = prealloc, .flags = flags, .errp = errp, - .ret = NOT_DONE, }; - if (qemu_in_coroutine()) { - /* Fast-path if already in coroutine context */ - bdrv_truncate_co_entry(&tco); - } else { - co = qemu_coroutine_create(bdrv_truncate_co_entry, &tco); - bdrv_coroutine_enter(child->bs, co); - BDRV_POLL_WHILE(child->bs, tco.ret == NOT_DONE); - } - - return tco.ret; + return bdrv_run_co(child->bs, bdrv_truncate_co_entry, &tco); }