From d7beddcc024f90da6375c1694b805fc282c79402 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 28 Jan 2021 17:41:44 +0300 Subject: [PATCH 01/10] MAINTAINERS: Add Vladimir as co-maintainer for Block Jobs I'm developing Qemu backup for several years, and finally new backup architecture, including block-copy generic engine and backup-top filter landed upstream, great thanks to reviewers and especially to Max Reitz! I also have plans of moving other block-jobs onto block-copy, so that we finally have one generic block copying path, fast and well-formed. So, now I suggest to bring all parts of backup architecture into "Block Jobs" subsystem (actually, aio_task is shared with qcow2 and qemu-co-shared-resource can be reused somewhere else, but I'd keep an eye on them in context of block-jobs) and add myself as co-maintainer. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20210128144144.27617-1-vsementsov@virtuozzo.com> Reviewed-by: Markus Armbruster Reviewed-by: John Snow Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- MAINTAINERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index bcd88668bc..00626941f1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2210,6 +2210,7 @@ F: scsi/* Block Jobs M: John Snow +M: Vladimir Sementsov-Ogievskiy L: qemu-block@nongnu.org S: Supported F: blockjob.c @@ -2222,7 +2223,16 @@ F: block/commit.c F: block/stream.c F: block/mirror.c F: qapi/job.json +F: block/block-copy.c +F: include/block/block-copy.c +F: block/backup-top.h +F: block/backup-top.c +F: include/block/aio_task.h +F: block/aio_task.c +F: util/qemu-co-shared-resource.c +F: include/qemu/co-shared-resource.h T: git https://gitlab.com/jsnow/qemu.git jobs +T: git https://src.openvz.org/scm/~vsementsov/qemu.git jobs Block QAPI, monitor, command line M: Markus Armbruster From 722d8e73d65cb54f39d360ecb2147ac58f43c399 Mon Sep 17 00:00:00 2001 From: Sergio Lopez Date: Mon, 1 Feb 2021 13:50:31 +0100 Subject: [PATCH 02/10] block: Avoid processing BDS twice in bdrv_set_aio_context_ignore() Some graphs may contain an indirect reference to the first BDS in the chain that can be reached while walking it bottom->up from one its children. Doubling-processing of a BDS is especially problematic for the aio_notifiers, as they might attempt to work on both the old and the new AIO contexts. To avoid this problem, add every child and parent to the ignore list before actually processing them. Suggested-by: Kevin Wolf Signed-off-by: Sergio Lopez Message-Id: <20210201125032.44713-2-slp@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/block.c b/block.c index 91a66d4f3e..5c428e1595 100644 --- a/block.c +++ b/block.c @@ -6439,7 +6439,10 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs, AioContext *new_context, GSList **ignore) { AioContext *old_context = bdrv_get_aio_context(bs); - BdrvChild *child; + GSList *children_to_process = NULL; + GSList *parents_to_process = NULL; + GSList *entry; + BdrvChild *child, *parent; g_assert(qemu_get_current_aio_context() == qemu_get_aio_context()); @@ -6454,17 +6457,34 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs, continue; } *ignore = g_slist_prepend(*ignore, child); - bdrv_set_aio_context_ignore(child->bs, new_context, ignore); + children_to_process = g_slist_prepend(children_to_process, child); } - QLIST_FOREACH(child, &bs->parents, next_parent) { - if (g_slist_find(*ignore, child)) { + + QLIST_FOREACH(parent, &bs->parents, next_parent) { + if (g_slist_find(*ignore, parent)) { continue; } - assert(child->klass->set_aio_ctx); - *ignore = g_slist_prepend(*ignore, child); - child->klass->set_aio_ctx(child, new_context, ignore); + *ignore = g_slist_prepend(*ignore, parent); + parents_to_process = g_slist_prepend(parents_to_process, parent); } + for (entry = children_to_process; + entry != NULL; + entry = g_slist_next(entry)) { + child = entry->data; + bdrv_set_aio_context_ignore(child->bs, new_context, ignore); + } + g_slist_free(children_to_process); + + for (entry = parents_to_process; + entry != NULL; + entry = g_slist_next(entry)) { + parent = entry->data; + assert(parent->klass->set_aio_ctx); + parent->klass->set_aio_ctx(parent, new_context, ignore); + } + g_slist_free(parents_to_process); + bdrv_detach_aio_context(bs); /* Acquire the new context, if necessary */ From 1895b977f9a69419ae45cfc25805f71efae32eaf Mon Sep 17 00:00:00 2001 From: Sergio Lopez Date: Mon, 1 Feb 2021 13:50:32 +0100 Subject: [PATCH 03/10] block: move blk_exp_close_all() to qemu_cleanup() Move blk_exp_close_all() from bdrv_close() to qemu_cleanup(), before bdrv_drain_all_begin(). Export drivers may have coroutines yielding at some point in the block layer, so we need to shut them down before draining the block layer, as otherwise they may get stuck blk_wait_while_drained(). RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1900505 Signed-off-by: Sergio Lopez Message-Id: <20210201125032.44713-3-slp@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 1 - qemu-nbd.c | 1 + softmmu/runstate.c | 9 +++++++++ storage-daemon/qemu-storage-daemon.c | 1 + 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index 5c428e1595..4e52b1c588 100644 --- a/block.c +++ b/block.c @@ -4435,7 +4435,6 @@ static void bdrv_close(BlockDriverState *bs) void bdrv_close_all(void) { assert(job_next(NULL) == NULL); - blk_exp_close_all(); /* Drop references from requests still in flight, such as canceled block * jobs whose AIO context has not been polled yet */ diff --git a/qemu-nbd.c b/qemu-nbd.c index 0d513cb38c..608c63e82a 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -503,6 +503,7 @@ static const char *socket_activation_validate_opts(const char *device, static void qemu_nbd_shutdown(void) { job_cancel_sync_all(); + blk_exp_close_all(); bdrv_close_all(); } diff --git a/softmmu/runstate.c b/softmmu/runstate.c index beee050815..a7fcb603f7 100644 --- a/softmmu/runstate.c +++ b/softmmu/runstate.c @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include "audio/audio.h" #include "block/block.h" +#include "block/export.h" #include "chardev/char.h" #include "crypto/cipher.h" #include "crypto/init.h" @@ -784,6 +785,14 @@ void qemu_cleanup(void) */ migration_shutdown(); + /* + * Close the exports before draining the block layer. The export + * drivers may have coroutines yielding on it, so we need to clean + * them up before the drain, as otherwise they may be get stuck in + * blk_wait_while_drained(). + */ + blk_exp_close_all(); + /* * We must cancel all block jobs while the block layer is drained, * or cancelling will be affected by throttling and thus may block diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c index e0c87edbdd..d8d172cc60 100644 --- a/storage-daemon/qemu-storage-daemon.c +++ b/storage-daemon/qemu-storage-daemon.c @@ -314,6 +314,7 @@ int main(int argc, char *argv[]) main_loop_wait(false); } + blk_exp_close_all(); bdrv_drain_all_begin(); bdrv_close_all(); From d0bc412302e6f32e65835a83ddefa656c7d7e9ba Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 29 Jan 2021 19:13:23 +0300 Subject: [PATCH 04/10] iotests/297: pylint: ignore too many statements Ignore two complains, which now lead to 297 failure on testenv.py and testrunner.py. Fixes: 2e5a2f57db481f18fcf70be2a36b1417370b8476 Fixes: d74c754c924ca34e90b7c96ce2f5609d82c0e628 Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20210129161323.615027-1-vsementsov@virtuozzo.com> Reviewed-by: John Snow Signed-off-by: Kevin Wolf --- tests/qemu-iotests/pylintrc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/qemu-iotests/pylintrc b/tests/qemu-iotests/pylintrc index cd3702e23c..7a6c0a9474 100644 --- a/tests/qemu-iotests/pylintrc +++ b/tests/qemu-iotests/pylintrc @@ -21,6 +21,8 @@ disable=invalid-name, unsubscriptable-object, # These are temporary, and should be removed: missing-docstring, + too-many-return-statements, + too-many-statements [FORMAT] From ca502ca60dab85b11a7fad5978ab3de94c296fb6 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 2 Feb 2021 15:28:02 +0100 Subject: [PATCH 05/10] iotests: Revert emulator selection to old behaviour MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the qemu-system-{arch} binary for the host architecture can't be found, the old 'check' implementation selected the alphabetically first system emulator binary that it could find. The new Python implementation just uses the first result of glob.iglob(), which has an undefined order. This is a problem that breaks CI because the iotests aren't actually prepared to run on any emulator. They should be, so this is really a bug in the failing test cases that should be fixed there, but as a quick fix, let's revert to the old behaviour to let CI runs succeed again. Signed-off-by: Kevin Wolf Message-Id: <20210202142802.119999-1-kwolf@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Daniel P. Berrangé Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/testenv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py index b31275f518..1fbec854c1 100644 --- a/tests/qemu-iotests/testenv.py +++ b/tests/qemu-iotests/testenv.py @@ -135,7 +135,7 @@ class TestEnv(ContextManager['TestEnv']): if not os.path.exists(self.qemu_prog): pattern = root('qemu-system-*') try: - progs = glob.iglob(pattern) + progs = sorted(glob.iglob(pattern)) self.qemu_prog = next(p for p in progs if isxfile(p)) except StopIteration: sys.exit("Not found any Qemu executable binary by pattern " From 3ae50942f161e2e8fa6ecd69a9c17b681d419905 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 1 Feb 2021 11:50:41 +0300 Subject: [PATCH 06/10] iotests: check: return 1 on failure We should indicate failure by exit code, not only output. Reported-by: Peter Maydell Fixes: f203080bbd9f9e5b31041b1f2afcd6040c5aaec5 Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20210201085041.3079-1-vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf --- tests/qemu-iotests/check | 5 ++++- tests/qemu-iotests/testrunner.py | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index 5190dee82e..d1c87ceaf1 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -140,4 +140,7 @@ if __name__ == '__main__': else: with TestRunner(env, makecheck=args.makecheck, color=args.color) as tr: - tr.run_tests([os.path.join(env.source_iotests, t) for t in tests]) + paths = [os.path.join(env.source_iotests, t) for t in tests] + ok = tr.run_tests(paths) + if not ok: + sys.exit(1) diff --git a/tests/qemu-iotests/testrunner.py b/tests/qemu-iotests/testrunner.py index 24b3fba115..25754e9a09 100644 --- a/tests/qemu-iotests/testrunner.py +++ b/tests/qemu-iotests/testrunner.py @@ -318,7 +318,7 @@ class TestRunner(ContextManager['TestRunner']): return res - def run_tests(self, tests: List[str]) -> None: + def run_tests(self, tests: List[str]) -> bool: n_run = 0 failed = [] notrun = [] @@ -363,5 +363,7 @@ class TestRunner(ContextManager['TestRunner']): if failed: print('Failures:', ' '.join(failed)) print(f'Failed {len(failed)} of {n_run} iotests') + return False else: print(f'Passed all {n_run} iotests') + return True From 0212fa2afd1e20c590f740036f5cb837e1177d53 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 1 Feb 2021 17:10:24 +0100 Subject: [PATCH 07/10] iotests: Fix -makecheck output For -makecheck, the old 'check' implementation skipped the output when starting a test. It only had the condensed output at the end of a test. testrunner.py prints the normal output when starting a test even for -makecheck. This output contains '\r' at the end so that it can be overwritten with the result at the end of the test. However, for -makecheck this is shorter output in a different format, so effectively we end up with garbled output that mixes both output forms. Revert to the old behaviour of only printing a message after the test had completed in -makecheck mode. Fixes: d74c754c924ca34e90b7c96ce2f5609d82c0e628 Signed-off-by: Kevin Wolf Message-Id: <20210201161024.127921-1-kwolf@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- tests/qemu-iotests/testrunner.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/testrunner.py b/tests/qemu-iotests/testrunner.py index 25754e9a09..1fc61fcaa3 100644 --- a/tests/qemu-iotests/testrunner.py +++ b/tests/qemu-iotests/testrunner.py @@ -301,8 +301,10 @@ class TestRunner(ContextManager['TestRunner']): last_el = self.last_elapsed.get(test) start = datetime.datetime.now().strftime('%H:%M:%S') - self.test_print_one_line(test=test, starttime=start, lasttime=last_el, - end='\r', test_field_width=test_field_width) + if not self.makecheck: + self.test_print_one_line(test=test, starttime=start, + lasttime=last_el, end='\r', + test_field_width=test_field_width) res = self.do_run_test(test) From 97b709f32ed10a6cd63c3a41755245ec323eedbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Wed, 27 Jan 2021 22:21:36 +0100 Subject: [PATCH 08/10] block/nvme: Properly display doorbell stride length in trace event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 15b2260bef3 ("block/nvme: Trace controller capabilities") misunderstood the doorbell stride value from the datasheet, use the correct one. The 'doorbell_scale' variable used few lines later is correct. Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20210127212137.3482291-2-philmd@redhat.com> Reviewed-by: Klaus Jensen Signed-off-by: Kevin Wolf --- block/nvme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/nvme.c b/block/nvme.c index 5a6fbacf4a..80c4318d8f 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -745,7 +745,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, trace_nvme_controller_capability("Contiguous Queues Required", NVME_CAP_CQR(cap)); trace_nvme_controller_capability("Doorbell Stride", - 2 << (2 + NVME_CAP_DSTRD(cap))); + 1 << (2 + NVME_CAP_DSTRD(cap))); trace_nvme_controller_capability("Subsystem Reset Supported", NVME_CAP_NSSRS(cap)); trace_nvme_controller_capability("Memory Page Size Minimum", From fcc8672aca2902055c03ef79e4c9fcfad0227b70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Wed, 27 Jan 2021 22:21:37 +0100 Subject: [PATCH 09/10] block/nvme: Trace NVMe spec version supported by the controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NVMe controllers implement different versions of the spec, and different features of it. It is useful to gather this information when debugging. Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20210127212137.3482291-3-philmd@redhat.com> Reviewed-by: Klaus Jensen Signed-off-by: Kevin Wolf --- block/nvme.c | 6 ++++++ block/trace-events | 1 + 2 files changed, 7 insertions(+) diff --git a/block/nvme.c b/block/nvme.c index 80c4318d8f..2b5421e7aa 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -708,6 +708,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, AioContext *aio_context = bdrv_get_aio_context(bs); int ret; uint64_t cap; + uint32_t ver; uint64_t timeout_ms; uint64_t deadline, now; volatile NvmeBar *regs = NULL; @@ -764,6 +765,11 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, bs->bl.request_alignment = s->page_size; timeout_ms = MIN(500 * NVME_CAP_TO(cap), 30000); + ver = le32_to_cpu(regs->vs); + trace_nvme_controller_spec_version(extract32(ver, 16, 16), + extract32(ver, 8, 8), + extract32(ver, 0, 8)); + /* Reset device to get a clean state. */ regs->cc = cpu_to_le32(le32_to_cpu(regs->cc) & 0xFE); /* Wait for CSTS.RDY = 0. */ diff --git a/block/trace-events b/block/trace-events index 8368f4acb0..ecbc32a80a 100644 --- a/block/trace-events +++ b/block/trace-events @@ -136,6 +136,7 @@ qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s # nvme.c nvme_controller_capability_raw(uint64_t value) "0x%08"PRIx64 nvme_controller_capability(const char *desc, uint64_t value) "%s: %"PRIu64 +nvme_controller_spec_version(uint32_t mjr, uint32_t mnr, uint32_t ter) "Specification supported: %u.%u.%u" nvme_kick(void *s, unsigned q_index) "s %p q #%u" nvme_dma_flush_queue_wait(void *s) "s %p" nvme_error(int cmd_specific, int sq_head, int sqid, int cid, int status) "cmd_specific %d sq_head %d sqid %d cid %d status 0x%x" From 26513a01741f51650f5dd716681995359794ba6f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 2 Feb 2021 16:59:11 +0100 Subject: [PATCH 10/10] block: Fix VM size column width in bdrv_snapshot_dump() size_to_str() can return a size like "4.24 MiB", with a single digit integer part and two fractional digits. This is eight characters, but commit b39847a5 changed the format string to only reserve seven characters for the column. This can result in unaligned columns, which in turn changes the output of iotests case 267 because exceeding the column size defeats the attempt to filter the size out of the output (observed with the ppc64 emulator). The resulting change is only a whitespace change, but since commit f203080b this is enough for iotests to consider the test failed. Taking a character away from the tag name column and adding it to the VM size column doesn't change anything in the common case (the tag name is left justified, the VM size is right justified), but fixes this case. Fixes: b39847a50553b7679d6d7fefbe6a108a17aacf8d Signed-off-by: Kevin Wolf Message-Id: <20210202155911.179865-1-kwolf@redhat.com> Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/qapi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/qapi.c b/block/qapi.c index 0a96099e36..84a0aadc09 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -677,7 +677,7 @@ void bdrv_snapshot_dump(QEMUSnapshotInfo *sn) char *sizing = NULL; if (!sn) { - qemu_printf("%-10s%-18s%7s%20s%13s%11s", + qemu_printf("%-10s%-17s%8s%20s%13s%11s", "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK", "ICOUNT"); } else { ti = sn->date_sec; @@ -696,7 +696,7 @@ void bdrv_snapshot_dump(QEMUSnapshotInfo *sn) snprintf(icount_buf, sizeof(icount_buf), "%"PRId64, sn->icount); } - qemu_printf("%-9s %-17s %7s%20s%13s%11s", + qemu_printf("%-9s %-16s %8s%20s%13s%11s", sn->id_str, sn->name, sizing, date_buf,