From c1c1f6cf511496b985cb9a1c536d59c9be7b9317 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Mon, 29 Mar 2021 17:01:28 +0200 Subject: [PATCH 01/10] block/rbd: fix memory leak in qemu_rbd_connect() In qemu_rbd_connect(), 'mon_host' is allocated by qemu_rbd_mon_host() using g_strjoinv(), but it's only freed in the error path, leaking memory in the success path as reported by valgrind: 80 bytes in 4 blocks are definitely lost in loss record 5,028 of 6,516 at 0x4839809: malloc (vg_replace_malloc.c:307) by 0x5315BB8: g_malloc (in /usr/lib64/libglib-2.0.so.0.6600.8) by 0x532B6FF: g_strjoinv (in /usr/lib64/libglib-2.0.so.0.6600.8) by 0x87D07E: qemu_rbd_mon_host (rbd.c:538) by 0x87D07E: qemu_rbd_connect (rbd.c:562) by 0x87E1CE: qemu_rbd_open (rbd.c:740) by 0x840EB1: bdrv_open_driver (block.c:1528) by 0x8453A9: bdrv_open_common (block.c:1802) by 0x8453A9: bdrv_open_inherit (block.c:3444) by 0x8464C2: bdrv_open (block.c:3537) by 0x8108CD: qmp_blockdev_add (blockdev.c:3569) by 0x8EA61B: qmp_marshal_blockdev_add (qapi-commands-block-core.c:1086) by 0x90B528: do_qmp_dispatch_bh (qmp-dispatch.c:131) by 0x907EA4: aio_bh_poll (async.c:164) Fix freeing 'mon_host' also when qemu_rbd_connect() ends correctly. Fixes: 0a55679b4a5061f4d74bdb1a0e81611ba3390b00 Signed-off-by: Stefano Garzarella Message-Id: <20210329150129.121182-2-sgarzare@redhat.com> Reviewed-by: Markus Armbruster Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- block/rbd.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/block/rbd.c b/block/rbd.c index 9071a00e3f..24cefcd0dc 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -563,13 +563,13 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx, if (local_err) { error_propagate(errp, local_err); r = -EINVAL; - goto failed_opts; + goto out; } r = rados_create(cluster, opts->user); if (r < 0) { error_setg_errno(errp, -r, "error initializing"); - goto failed_opts; + goto out; } /* try default location when conf=NULL, but ignore failure */ @@ -626,11 +626,12 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx, */ rados_ioctx_set_namespace(*io_ctx, opts->q_namespace); - return 0; + r = 0; + goto out; failed_shutdown: rados_shutdown(*cluster); -failed_opts: +out: g_free(mon_host); return r; } From b084b420d9d6347dede328fbcf18c8e4c695f7e8 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Mon, 29 Mar 2021 17:01:29 +0200 Subject: [PATCH 02/10] block/rbd: fix memory leak in qemu_rbd_co_create_opts() When we allocate 'q_namespace', we forgot to set 'has_q_namespace' to true. This can cause several issues, including a memory leak, since qapi_free_BlockdevCreateOptions() does not deallocate that memory, as reported by valgrind: 13 bytes in 1 blocks are definitely lost in loss record 7 of 96 at 0x4839809: malloc (vg_replace_malloc.c:307) by 0x48CEBB8: g_malloc (in /usr/lib64/libglib-2.0.so.0.6600.8) by 0x48E3FE3: g_strdup (in /usr/lib64/libglib-2.0.so.0.6600.8) by 0x180010: qemu_rbd_co_create_opts (rbd.c:446) by 0x1AE72C: bdrv_create_co_entry (block.c:492) by 0x241902: coroutine_trampoline (coroutine-ucontext.c:173) by 0x57530AF: ??? (in /usr/lib64/libc-2.32.so) by 0x1FFEFFFA6F: ??? Fix setting 'has_q_namespace' to true when we allocate 'q_namespace'. Fixes: 19ae9ae014 ("block/rbd: Add support for ceph namespaces") Signed-off-by: Stefano Garzarella Message-Id: <20210329150129.121182-3-sgarzare@redhat.com> Reviewed-by: Markus Armbruster Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- block/rbd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/rbd.c b/block/rbd.c index 24cefcd0dc..f098a89c7b 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -444,6 +444,7 @@ static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv, loc->user = g_strdup(qdict_get_try_str(options, "user")); loc->has_user = !!loc->user; loc->q_namespace = g_strdup(qdict_get_try_str(options, "namespace")); + loc->has_q_namespace = !!loc->q_namespace; loc->image = g_strdup(qdict_get_try_str(options, "image")); keypairs = qdict_get_try_str(options, "=keyvalue-pairs"); From 66f18320f751f9649e0f230e814dd556e38bc1fe Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Thu, 1 Apr 2021 15:28:39 +0200 Subject: [PATCH 03/10] iotests/qsd-jobs: Filter events in the first test The job may or may not be ready before the 'quit' is issued. Whether it is is irrelevant; for the purpose of the test, it only needs to still be there. Filter the job status change and READY events from the output so it becomes reliable. Reported-by: Peter Maydell Suggested-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Max Reitz Message-Id: <20210401132839.139939-1-mreitz@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- tests/qemu-iotests/tests/qsd-jobs | 5 ++++- tests/qemu-iotests/tests/qsd-jobs.out | 10 ---------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/tests/qemu-iotests/tests/qsd-jobs b/tests/qemu-iotests/tests/qsd-jobs index 972b6b3898..510bf0a9dc 100755 --- a/tests/qemu-iotests/tests/qsd-jobs +++ b/tests/qemu-iotests/tests/qsd-jobs @@ -52,9 +52,12 @@ echo "=== Job still present at shutdown ===" echo # Just make sure that this doesn't crash +# (Filter job status and READY events, because their order may differ +# between runs, particularly around when 'quit' is issued) $QSD --chardev stdio,id=stdio --monitor chardev=stdio \ --blockdev node-name=file0,driver=file,filename="$TEST_IMG" \ - --blockdev node-name=fmt0,driver=qcow2,file=file0 < Date: Thu, 1 Apr 2021 19:15:22 +0300 Subject: [PATCH 04/10] iotests: add test for removing persistent bitmap from backing file Just demonstrate one of x-blockdev-reopen usecases. We can't simply remove persistent bitmap from RO node (for example from backing file), as we need to remove it from the image too. So, we should reopen the node first. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20210401161522.8001-1-vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf --- .../tests/remove-bitmap-from-backing | 69 +++++++++++++++++++ .../tests/remove-bitmap-from-backing.out | 6 ++ 2 files changed, 75 insertions(+) create mode 100755 tests/qemu-iotests/tests/remove-bitmap-from-backing create mode 100644 tests/qemu-iotests/tests/remove-bitmap-from-backing.out diff --git a/tests/qemu-iotests/tests/remove-bitmap-from-backing b/tests/qemu-iotests/tests/remove-bitmap-from-backing new file mode 100755 index 0000000000..0ea4c36507 --- /dev/null +++ b/tests/qemu-iotests/tests/remove-bitmap-from-backing @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +# +# Test removing persistent bitmap from backing +# +# Copyright (c) 2021 Virtuozzo International GmbH. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import iotests +from iotests import log, qemu_img_create, qemu_img, qemu_img_pipe + +iotests.script_initialize(supported_fmts=['qcow2']) + +top, base = iotests.file_path('top', 'base') +size = '1M' + +assert qemu_img_create('-f', iotests.imgfmt, base, size) == 0 +assert qemu_img_create('-f', iotests.imgfmt, '-b', base, + '-F', iotests.imgfmt, top, size) == 0 + +assert qemu_img('bitmap', '--add', base, 'bitmap0') == 0 +# Just assert that our method of checking bitmaps in the image works. +assert 'bitmaps' in qemu_img_pipe('info', base) + +vm = iotests.VM().add_drive(top, 'backing.node-name=base') +vm.launch() + +log('Trying to remove persistent bitmap from r-o base node, should fail:') +vm.qmp_log('block-dirty-bitmap-remove', node='base', name='bitmap0') + +new_base_opts = { + 'node-name': 'base', + 'driver': 'qcow2', + 'file': { + 'driver': 'file', + 'filename': base + }, + 'read-only': False +} + +# Don't want to bother with filtering qmp_log for reopen command +result = vm.qmp('x-blockdev-reopen', **new_base_opts) +if result != {'return': {}}: + log('Failed to reopen: ' + str(result)) + +log('Remove persistent bitmap from base node reopened to RW:') +vm.qmp_log('block-dirty-bitmap-remove', node='base', name='bitmap0') + +new_base_opts['read-only'] = True +result = vm.qmp('x-blockdev-reopen', **new_base_opts) +if result != {'return': {}}: + log('Failed to reopen: ' + str(result)) + +vm.shutdown() + +if 'bitmaps' in qemu_img_pipe('info', base): + log('ERROR: Bitmap is still in the base image') diff --git a/tests/qemu-iotests/tests/remove-bitmap-from-backing.out b/tests/qemu-iotests/tests/remove-bitmap-from-backing.out new file mode 100644 index 0000000000..c28af82c75 --- /dev/null +++ b/tests/qemu-iotests/tests/remove-bitmap-from-backing.out @@ -0,0 +1,6 @@ +Trying to remove persistent bitmap from r-o base node, should fail: +{"execute": "block-dirty-bitmap-remove", "arguments": {"name": "bitmap0", "node": "base"}} +{"error": {"class": "GenericError", "desc": "Bitmap 'bitmap0' is readonly and cannot be modified"}} +Remove persistent bitmap from base node reopened to RW: +{"execute": "block-dirty-bitmap-remove", "arguments": {"name": "bitmap0", "node": "base"}} +{"return": {}} From f940b0ac6fa67deb9d0b671cf83070f0286c67e1 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 31 Mar 2021 14:28:15 +0200 Subject: [PATCH 05/10] iotests: Test mirror-top filter permissions Add a test accompanying commit 53431b9086b2832ca1aeff0c55e186e9ed79bd11 ("block/mirror: Fix mirror_top's permissions"). Signed-off-by: Max Reitz Message-Id: <20210331122815.51491-1-mreitz@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/tests/mirror-top-perms | 121 ++++++++++++++++++ tests/qemu-iotests/tests/mirror-top-perms.out | 5 + 2 files changed, 126 insertions(+) create mode 100755 tests/qemu-iotests/tests/mirror-top-perms create mode 100644 tests/qemu-iotests/tests/mirror-top-perms.out diff --git a/tests/qemu-iotests/tests/mirror-top-perms b/tests/qemu-iotests/tests/mirror-top-perms new file mode 100755 index 0000000000..451a0666f8 --- /dev/null +++ b/tests/qemu-iotests/tests/mirror-top-perms @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +# group: rw +# +# Test permissions taken by the mirror-top filter +# +# Copyright (C) 2021 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import os +import iotests +from iotests import qemu_img + +# Import qemu after iotests.py has amended sys.path +# pylint: disable=wrong-import-order +import qemu + + +image_size = 1 * 1024 * 1024 +source = os.path.join(iotests.test_dir, 'source.img') + + +class TestMirrorTopPerms(iotests.QMPTestCase): + def setUp(self): + assert qemu_img('create', '-f', iotests.imgfmt, source, + str(image_size)) == 0 + self.vm = iotests.VM() + self.vm.add_drive(source) + self.vm.add_blockdev(f'null-co,node-name=null,size={image_size}') + self.vm.launch() + + # Will be created by the test function itself + self.vm_b = None + + def tearDown(self): + try: + self.vm.shutdown() + except qemu.machine.AbnormalShutdown: + pass + + if self.vm_b is not None: + self.vm_b.shutdown() + + os.remove(source) + + def test_cancel(self): + """ + Before commit 53431b9086b28, mirror-top used to not take any + permissions but WRITE and share all permissions. Because it + is inserted between the source's original parents and the + source, there generally was no parent that would have taken or + unshared any permissions on the source, which means that an + external process could access the image unhindered by locks. + (Unless there was a parent above the protocol node that would + take its own locks, e.g. a format driver.) + This is bad enough, but if the mirror job is then cancelled, + the mirroring VM tries to take back the image, restores the + original permissions taken and unshared, and assumes this must + just work. But it will not, and so the VM aborts. + + Commit 53431b9086b28 made mirror keep the original permissions + and so no other process can "steal" the image. + + (Note that you cannot really do the same with the target image + and then completing the job, because the mirror job always + took/unshared the correct permissions on the target. For + example, it does not share READ_CONSISTENT, which makes it + difficult to let some other qemu process open the image.) + """ + + result = self.vm.qmp('blockdev-mirror', + job_id='mirror', + device='drive0', + target='null', + sync='full') + self.assert_qmp(result, 'return', {}) + + self.vm.event_wait('BLOCK_JOB_READY') + + # We want this to fail because the image cannot be locked. + # If it does not fail, continue still and see what happens. + self.vm_b = iotests.VM(path_suffix='b') + # Must use -blockdev -device so we can use share-rw. + # (And we need share-rw=on because mirror-top was always + # forced to take the WRITE permission so it can write to the + # source image.) + self.vm_b.add_blockdev(f'file,node-name=drive0,filename={source}') + self.vm_b.add_device('virtio-blk,drive=drive0,share-rw=on') + try: + self.vm_b.launch() + print('ERROR: VM B launched successfully, this should not have ' + 'happened') + except qemu.qmp.QMPConnectError: + assert 'Is another process using the image' in self.vm_b.get_log() + + result = self.vm.qmp('block-job-cancel', + device='mirror') + self.assert_qmp(result, 'return', {}) + + self.vm.event_wait('BLOCK_JOB_COMPLETED') + + +if __name__ == '__main__': + # No metadata format driver supported, because they would for + # example always unshare the WRITE permission. The raw driver + # just passes through the permissions from the guest device, and + # those are the permissions that we want to test. + iotests.main(supported_fmts=['raw'], + supported_protocols=['file']) diff --git a/tests/qemu-iotests/tests/mirror-top-perms.out b/tests/qemu-iotests/tests/mirror-top-perms.out new file mode 100644 index 0000000000..ae1213e6f8 --- /dev/null +++ b/tests/qemu-iotests/tests/mirror-top-perms.out @@ -0,0 +1,5 @@ +. +---------------------------------------------------------------------- +Ran 1 tests + +OK From da64789d3a16b2c5b5f1be9c75b00c2b8ae393a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Wed, 7 Apr 2021 15:37:42 +0200 Subject: [PATCH 06/10] hw/block/fdc: Fix 'fallback' property on sysbus floppy disk controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting the 'fallback' property corrupts the QOM instance state (FDCtrlSysBus) because it accesses an incorrect offset (it uses the offset of the FDCtrlISABus state). Cc: qemu-stable@nongnu.org Fixes: a73275dd6fc ("fdc: Add fallback option") Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20210407133742.1680424-1-f4bug@amsat.org> Reviewed-by: Markus Armbruster Signed-off-by: Kevin Wolf --- hw/block/fdc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/block/fdc.c b/hw/block/fdc.c index 82afda7f3a..a825c2acba 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -2893,7 +2893,7 @@ static Property sysbus_fdc_properties[] = { DEFINE_PROP_SIGNED("fdtypeB", FDCtrlSysBus, state.qdev_for_drives[1].type, FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type, FloppyDriveType), - DEFINE_PROP_SIGNED("fallback", FDCtrlISABus, state.fallback, + DEFINE_PROP_SIGNED("fallback", FDCtrlSysBus, state.fallback, FLOPPY_DRIVE_TYPE_144, qdev_prop_fdc_drive_type, FloppyDriveType), DEFINE_PROP_END_OF_LIST(), @@ -2918,7 +2918,7 @@ static Property sun4m_fdc_properties[] = { DEFINE_PROP_SIGNED("fdtype", FDCtrlSysBus, state.qdev_for_drives[0].type, FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type, FloppyDriveType), - DEFINE_PROP_SIGNED("fallback", FDCtrlISABus, state.fallback, + DEFINE_PROP_SIGNED("fallback", FDCtrlSysBus, state.fallback, FLOPPY_DRIVE_TYPE_144, qdev_prop_fdc_drive_type, FloppyDriveType), DEFINE_PROP_END_OF_LIST(), From c41f5b96ee73925c165036d59c4efa761826e800 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 9 Apr 2021 14:04:18 +0200 Subject: [PATCH 07/10] mirror: Move open_backing_file to exit_common This is a graph change and therefore should be done in job-finalize (which is what invokes mirror_exit_common()). Signed-off-by: Max Reitz Message-Id: <20210409120422.144040-2-mreitz@redhat.com> Signed-off-by: Kevin Wolf --- block/mirror.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index d7e54c0ff7..f1f936bf1a 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -689,6 +689,14 @@ static int mirror_exit_common(Job *job) ret = -EPERM; } } + } else if (!abort && s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) { + assert(!bdrv_backing_chain_next(target_bs)); + ret = bdrv_open_backing_file(bdrv_skip_filters(target_bs), NULL, + "backing", &local_err); + if (ret < 0) { + error_report_err(local_err); + local_err = NULL; + } } if (s->to_replace) { @@ -1107,9 +1115,6 @@ immediate_exit: static void mirror_complete(Job *job, Error **errp) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); - BlockDriverState *target; - - target = blk_bs(s->target); if (!s->synced) { error_setg(errp, "The active block job '%s' cannot be completed", @@ -1117,17 +1122,6 @@ static void mirror_complete(Job *job, Error **errp) return; } - if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) { - int ret; - - assert(!bdrv_backing_chain_next(target)); - ret = bdrv_open_backing_file(bdrv_skip_filters(target), NULL, - "backing", errp); - if (ret < 0) { - return; - } - } - /* block all operations on to_replace bs */ if (s->replaces) { AioContext *replace_aio_context; From 00769414cd1044b823b65e66586e93bb79494441 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 9 Apr 2021 14:04:19 +0200 Subject: [PATCH 08/10] mirror: Do not enter a paused job on completion Currently, it is impossible to complete jobs on standby (i.e. paused ready jobs), but actually the only thing in mirror_complete() that does not work quite well with a paused job is the job_enter() at the end. If we make it conditional, this function works just fine even if the mirror job is paused. So technically this is a no-op, but obviously the intention is to accept block-job-complete even for jobs on standby, which we need this patch for first. Signed-off-by: Max Reitz Message-Id: <20210409120422.144040-3-mreitz@redhat.com> Signed-off-by: Kevin Wolf --- block/mirror.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/block/mirror.c b/block/mirror.c index f1f936bf1a..5a71bd8bbc 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1148,7 +1148,11 @@ static void mirror_complete(Job *job, Error **errp) } s->should_complete = true; - job_enter(job); + + /* If the job is paused, it will be re-entered when it is resumed */ + if (!job->paused) { + job_enter(job); + } } static void coroutine_fn mirror_pause(Job *job) From 53ddb9c892f048bd031568178da52e4964d7d30a Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 9 Apr 2021 14:04:20 +0200 Subject: [PATCH 09/10] job: Allow complete for jobs on standby The only job that implements .complete is the mirror job, and it can handle completion requests just fine while the job is paused. Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1945635 Signed-off-by: Max Reitz Message-Id: <20210409120422.144040-4-mreitz@redhat.com> Signed-off-by: Kevin Wolf --- job.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/job.c b/job.c index 289edee143..4aff13d95a 100644 --- a/job.c +++ b/job.c @@ -56,7 +56,7 @@ bool JobVerbTable[JOB_VERB__MAX][JOB_STATUS__MAX] = { [JOB_VERB_PAUSE] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0}, [JOB_VERB_RESUME] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0}, [JOB_VERB_SET_SPEED] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0}, - [JOB_VERB_COMPLETE] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, + [JOB_VERB_COMPLETE] = {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0}, [JOB_VERB_FINALIZE] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, [JOB_VERB_DISMISS] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0}, }; @@ -991,7 +991,7 @@ void job_complete(Job *job, Error **errp) if (job_apply_verb(job, JOB_VERB_COMPLETE, errp)) { return; } - if (job->pause_count || job_is_cancelled(job) || !job->driver->complete) { + if (job_is_cancelled(job) || !job->driver->complete) { error_setg(errp, "The active block job '%s' cannot be completed", job->id); return; From c2c731a4d35062295cd3260e66b3754588a2fad4 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 9 Apr 2021 14:04:21 +0200 Subject: [PATCH 10/10] test-blockjob: Test job_wait_unpaused() Create a job that remains on STANDBY after a drained section, and see that invoking job_wait_unpaused() will get it unstuck. Signed-off-by: Max Reitz Message-Id: <20210409120422.144040-5-mreitz@redhat.com> Signed-off-by: Kevin Wolf --- tests/unit/test-blockjob.c | 121 +++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/tests/unit/test-blockjob.c b/tests/unit/test-blockjob.c index 7519847912..dcacfa6c7c 100644 --- a/tests/unit/test-blockjob.c +++ b/tests/unit/test-blockjob.c @@ -16,6 +16,7 @@ #include "block/blockjob_int.h" #include "sysemu/block-backend.h" #include "qapi/qmp/qdict.h" +#include "iothread.h" static const BlockJobDriver test_block_job_driver = { .job_driver = { @@ -375,6 +376,125 @@ static void test_cancel_concluded(void) cancel_common(s); } +/* (See test_yielding_driver for the job description) */ +typedef struct YieldingJob { + BlockJob common; + bool should_complete; +} YieldingJob; + +static void yielding_job_complete(Job *job, Error **errp) +{ + YieldingJob *s = container_of(job, YieldingJob, common.job); + s->should_complete = true; + job_enter(job); +} + +static int coroutine_fn yielding_job_run(Job *job, Error **errp) +{ + YieldingJob *s = container_of(job, YieldingJob, common.job); + + job_transition_to_ready(job); + + while (!s->should_complete) { + job_yield(job); + } + + return 0; +} + +/* + * This job transitions immediately to the READY state, and then + * yields until it is to complete. + */ +static const BlockJobDriver test_yielding_driver = { + .job_driver = { + .instance_size = sizeof(YieldingJob), + .free = block_job_free, + .user_resume = block_job_user_resume, + .run = yielding_job_run, + .complete = yielding_job_complete, + }, +}; + +/* + * Test that job_complete() works even on jobs that are in a paused + * state (i.e., STANDBY). + * + * To do this, run YieldingJob in an IO thread, get it into the READY + * state, then have a drained section. Before ending the section, + * acquire the context so the job will not be entered and will thus + * remain on STANDBY. + * + * job_complete() should still work without error. + * + * Note that on the QMP interface, it is impossible to lock an IO + * thread before a drained section ends. In practice, the + * bdrv_drain_all_end() and the aio_context_acquire() will be + * reversed. However, that makes for worse reproducibility here: + * Sometimes, the job would no longer be in STANDBY then but already + * be started. We cannot prevent that, because the IO thread runs + * concurrently. We can only prevent it by taking the lock before + * ending the drained section, so we do that. + * + * (You can reverse the order of operations and most of the time the + * test will pass, but sometimes the assert(status == STANDBY) will + * fail.) + */ +static void test_complete_in_standby(void) +{ + BlockBackend *blk; + IOThread *iothread; + AioContext *ctx; + Job *job; + BlockJob *bjob; + + /* Create a test drive, move it to an IO thread */ + blk = create_blk(NULL); + iothread = iothread_new(); + + ctx = iothread_get_aio_context(iothread); + blk_set_aio_context(blk, ctx, &error_abort); + + /* Create our test job */ + bjob = mk_job(blk, "job", &test_yielding_driver, true, + JOB_MANUAL_FINALIZE | JOB_MANUAL_DISMISS); + job = &bjob->job; + assert(job->status == JOB_STATUS_CREATED); + + /* Wait for the job to become READY */ + job_start(job); + aio_context_acquire(ctx); + AIO_WAIT_WHILE(ctx, job->status != JOB_STATUS_READY); + aio_context_release(ctx); + + /* Begin the drained section, pausing the job */ + bdrv_drain_all_begin(); + assert(job->status == JOB_STATUS_STANDBY); + /* Lock the IO thread to prevent the job from being run */ + aio_context_acquire(ctx); + /* This will schedule the job to resume it */ + bdrv_drain_all_end(); + + /* But the job cannot run, so it will remain on standby */ + assert(job->status == JOB_STATUS_STANDBY); + + /* Even though the job is on standby, this should work */ + job_complete(job, &error_abort); + + /* The test is done now, clean up. */ + job_finish_sync(job, NULL, &error_abort); + assert(job->status == JOB_STATUS_PENDING); + + job_finalize(job, &error_abort); + assert(job->status == JOB_STATUS_CONCLUDED); + + job_dismiss(&job, &error_abort); + + destroy_blk(blk); + aio_context_release(ctx); + iothread_join(iothread); +} + int main(int argc, char **argv) { qemu_init_main_loop(&error_abort); @@ -389,5 +509,6 @@ int main(int argc, char **argv) g_test_add_func("/blockjob/cancel/standby", test_cancel_standby); g_test_add_func("/blockjob/cancel/pending", test_cancel_pending); g_test_add_func("/blockjob/cancel/concluded", test_cancel_concluded); + g_test_add_func("/blockjob/complete_in_standby", test_complete_in_standby); return g_test_run(); }