From b857431d2abe3945b672b41f33690e9943a8752a Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Tue, 25 Sep 2018 13:05:01 +0800 Subject: [PATCH 01/23] file-posix: Include filename in locking error message Image locking errors happening at device initialization time doesn't say which file cannot be locked, for instance, -device scsi-disk,drive=drive-1: Failed to get shared "write" lock Is another process using the image? could refer to either the overlay image or its backing image. Hoist the error_append_hint to the caller of raw_check_lock_bytes where file name is known, and include it in the error hint. Signed-off-by: Fam Zheng Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/file-posix.c | 10 +++-- tests/qemu-iotests/153.out | 76 +++++++++++++++++++------------------- tests/qemu-iotests/182.out | 2 +- 3 files changed, 45 insertions(+), 43 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index fe83cbf0eb..327f39ca45 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -741,8 +741,6 @@ static int raw_check_lock_bytes(int fd, uint64_t perm, uint64_t shared_perm, "Failed to get \"%s\" lock", perm_name); g_free(perm_name); - error_append_hint(errp, - "Is another process using the image?\n"); return ret; } } @@ -758,8 +756,6 @@ static int raw_check_lock_bytes(int fd, uint64_t perm, uint64_t shared_perm, "Failed to get shared \"%s\" lock", perm_name); g_free(perm_name); - error_append_hint(errp, - "Is another process using the image?\n"); return ret; } } @@ -796,6 +792,9 @@ static int raw_handle_perm_lock(BlockDriverState *bs, if (!ret) { return 0; } + error_append_hint(errp, + "Is another process using the image [%s]?\n", + bs->filename); } op = RAW_PL_ABORT; /* fall through to unlock bytes. */ @@ -2217,6 +2216,9 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) /* Step two: Check that nobody else has taken conflicting locks */ result = raw_check_lock_bytes(fd, perm, shared, errp); if (result < 0) { + error_append_hint(errp, + "Is another process using the image [%s]?\n", + file_opts->filename); goto out_unlock; } diff --git a/tests/qemu-iotests/153.out b/tests/qemu-iotests/153.out index 93eaf10486..884254868c 100644 --- a/tests/qemu-iotests/153.out +++ b/tests/qemu-iotests/153.out @@ -12,11 +12,11 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t == Launching another QEMU, opts: '' == QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? == Launching another QEMU, opts: 'read-only=on' == QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,read-only=on: Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? == Launching another QEMU, opts: 'read-only=on,force-share=on' == @@ -24,77 +24,77 @@ Is another process using the image? _qemu_io_wrapper -c read 0 512 TEST_DIR/t.qcow2 can't open device TEST_DIR/t.qcow2: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_io_wrapper -r -c read 0 512 TEST_DIR/t.qcow2 can't open device TEST_DIR/t.qcow2: Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_io_wrapper -c open TEST_DIR/t.qcow2 -c read 0 512 can't open device TEST_DIR/t.qcow2: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? no file open, try 'help open' _qemu_io_wrapper -c open -r TEST_DIR/t.qcow2 -c read 0 512 can't open device TEST_DIR/t.qcow2: Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? no file open, try 'help open' _qemu_img_wrapper info TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper check TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper compare TEST_DIR/t.qcow2 TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper map TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper amend -o TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper commit TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper resize TEST_DIR/t.qcow2 32M qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper rebase TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper snapshot -l TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper convert TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.convert qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper dd if=TEST_DIR/t.qcow2 of=TEST_DIR/t.qcow2.convert bs=512 count=1 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper bench -c 1 TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper bench -w -c 1 TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper create -f qcow2 TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base qemu-img: TEST_DIR/t.qcow2: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? file format: IMGFMT == Running utility commands -U == @@ -132,7 +132,7 @@ Try 'qemu-img --help' for more information _qemu_img_wrapper rebase -U TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper snapshot -l -U TEST_DIR/t.qcow2 @@ -157,7 +157,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t == Launching another QEMU, opts: '' == QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? == Launching another QEMU, opts: 'read-only=on' == @@ -167,13 +167,13 @@ Is another process using the image? _qemu_io_wrapper -c read 0 512 TEST_DIR/t.qcow2 can't open device TEST_DIR/t.qcow2: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_io_wrapper -r -c read 0 512 TEST_DIR/t.qcow2 _qemu_io_wrapper -c open TEST_DIR/t.qcow2 -c read 0 512 can't open device TEST_DIR/t.qcow2: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? no file open, try 'help open' _qemu_io_wrapper -c open -r TEST_DIR/t.qcow2 -c read 0 512 @@ -188,19 +188,19 @@ _qemu_img_wrapper map TEST_DIR/t.qcow2 _qemu_img_wrapper amend -o TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper commit TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper resize TEST_DIR/t.qcow2 32M qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper rebase TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper snapshot -l TEST_DIR/t.qcow2 @@ -212,11 +212,11 @@ _qemu_img_wrapper bench -c 1 TEST_DIR/t.qcow2 _qemu_img_wrapper bench -w -c 1 TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper create -f qcow2 TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base qemu-img: TEST_DIR/t.qcow2: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? file format: IMGFMT == Running utility commands -U == @@ -254,7 +254,7 @@ Try 'qemu-img --help' for more information _qemu_img_wrapper rebase -U TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper snapshot -l -U TEST_DIR/t.qcow2 @@ -372,17 +372,17 @@ Round done == Two devices with the same image (read-only=off - read-only=off) == QEMU_PROG: -drive if=none,file=TEST_DIR/t.qcow2,read-only=off: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? == Two devices with the same image (read-only=off - read-only=on) == QEMU_PROG: -drive if=none,file=TEST_DIR/t.qcow2,read-only=on: Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? == Two devices with the same image (read-only=off - read-only=on,force-share=on) == == Two devices with the same image (read-only=on - read-only=off) == QEMU_PROG: -drive if=none,file=TEST_DIR/t.qcow2,read-only=off: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? == Two devices with the same image (read-only=on - read-only=on) == @@ -403,13 +403,13 @@ Formatting 'TEST_DIR/t.IMGFMT.c', fmt=IMGFMT size=33554432 backing_file=TEST_DIR == Backing image also as an active device == QEMU_PROG: -drive if=none,file=TEST_DIR/t.qcow2: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? == Backing image also as an active device (ro) == == Symbolic link == QEMU_PROG: -drive if=none,file=TEST_DIR/t.qcow2: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? == Active commit to intermediate layer should work when base in use == {"return": {}} @@ -420,7 +420,7 @@ Adding drive _qemu_io_wrapper TEST_DIR/t.qcow2 -c write 0 512 can't open device TEST_DIR/t.qcow2: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? Creating overlay with qemu-img when the guest is running should be allowed _qemu_img_wrapper create -f qcow2 -b TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.overlay @@ -433,7 +433,7 @@ _qemu_img_wrapper info TEST_DIR/t.qcow2 _qemu_io_wrapper TEST_DIR/t.qcow2 -c write 0 512 can't open device TEST_DIR/t.qcow2: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? Closing the other _qemu_io_wrapper TEST_DIR/t.qcow2 -c write 0 512 diff --git a/tests/qemu-iotests/182.out b/tests/qemu-iotests/182.out index 23a4dbf809..f1463c8862 100644 --- a/tests/qemu-iotests/182.out +++ b/tests/qemu-iotests/182.out @@ -4,5 +4,5 @@ Starting QEMU Starting a second QEMU using the same image should fail QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,id=drive0,file.locking=on: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? *** done From a8003ec40d0755228b646408d653bf0969d6ac33 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Thu, 6 Sep 2018 12:37:01 +0300 Subject: [PATCH 02/23] qemu-io: Fix writethrough check in reopen "qemu-io reopen" doesn't allow changing the writethrough setting of the cache, but the check is wrong, causing an error even on a simple reopen with the default parameters: $ qemu-img create -f qcow2 hd.qcow2 1M $ qemu-system-x86_64 -monitor stdio -drive if=virtio,file=hd.qcow2 (qemu) qemu-io virtio0 reopen Cannot change cache.writeback: Device attached Signed-off-by: Alberto Garcia Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- qemu-io-cmds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 5bf5f28178..db0b3ee5ef 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -2025,7 +2025,7 @@ static int reopen_f(BlockBackend *blk, int argc, char **argv) return -EINVAL; } - if (writethrough != blk_enable_write_cache(blk) && + if (!writethrough != blk_enable_write_cache(blk) && blk_get_attached_dev(blk)) { error_report("Cannot change cache.writeback: Device attached"); From 589f20dccd94f6fab3cd3d2e0d21aae0c7210d12 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Thu, 6 Sep 2018 12:37:02 +0300 Subject: [PATCH 03/23] file-posix: x-check-cache-dropped should default to false on reopen The default value of x-check-cache-dropped is false. There's no reason to use the previous value as a default in raw_reopen_prepare() because bdrv_reopen_queue_child() already takes care of putting the old options in the BDRVReopenState.options QDict. If x-check-cache-dropped was previously set but is now missing from the reopen QDict then it should be reset to false. Signed-off-by: Alberto Garcia Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- block/file-posix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/file-posix.c b/block/file-posix.c index 327f39ca45..bc5e54560a 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -850,7 +850,7 @@ static int raw_reopen_prepare(BDRVReopenState *state, } rs->check_cache_dropped = qemu_opt_get_bool(opts, "x-check-cache-dropped", - s->check_cache_dropped); + false); if (s->type == FTYPE_CD) { rs->open_flags |= O_NONBLOCK; From 50196d7a7c73133a6a6bf484571fb8f5daf7f104 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Thu, 6 Sep 2018 12:37:03 +0300 Subject: [PATCH 04/23] block: Remove child references from bs->{options,explicit_options} Block drivers allow opening their children using a reference to an existing BlockDriverState. These references remain stored in the 'options' and 'explicit_options' QDicts, but we don't need to keep them once everything is open. What is more important, these values can become wrong if the children change: $ qemu-img create -f qcow2 hd0.qcow2 10M $ qemu-img create -f qcow2 hd1.qcow2 10M $ qemu-img create -f qcow2 hd2.qcow2 10M $ $QEMU -drive if=none,file=hd0.qcow2,node-name=hd0 \ -drive if=none,file=hd1.qcow2,node-name=hd1,backing=hd0 \ -drive file=hd2.qcow2,node-name=hd2,backing=hd1 After this hd2 has hd1 as its backing file. Now let's remove it using block_stream: (qemu) block_stream hd2 0 hd0.qcow2 Now hd0 is the backing file of hd2, but hd2's options QDicts still contain backing=hd1. Signed-off-by: Alberto Garcia Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- block.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index c298ca6a19..db71ea5b9a 100644 --- a/block.c +++ b/block.c @@ -2763,12 +2763,15 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, } } - /* Remove all children options from bs->options and bs->explicit_options */ + /* Remove all children options and references + * from bs->options and bs->explicit_options */ QLIST_FOREACH(child, &bs->children, next) { char *child_key_dot; child_key_dot = g_strdup_printf("%s.", child->name); qdict_extract_subqdict(bs->explicit_options, NULL, child_key_dot); qdict_extract_subqdict(bs->options, NULL, child_key_dot); + qdict_del(bs->explicit_options, child->name); + qdict_del(bs->options, child->name); g_free(child_key_dot); } @@ -3290,6 +3293,7 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state) { BlockDriver *drv; BlockDriverState *bs; + BdrvChild *child; bool old_can_write, new_can_write; assert(reopen_state != NULL); @@ -3314,6 +3318,13 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state) bs->open_flags = reopen_state->flags; bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); + /* Remove child references from bs->options and bs->explicit_options. + * Child options were already removed in bdrv_reopen_queue_child() */ + QLIST_FOREACH(child, &bs->children, next) { + qdict_del(bs->explicit_options, child->name); + qdict_del(bs->options, child->name); + } + bdrv_refresh_limits(bs, NULL); bdrv_set_perm(reopen_state->bs, reopen_state->perm, From a600aaddc3894618b8a0a20a084dd0515c26f4d5 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Thu, 6 Sep 2018 12:37:04 +0300 Subject: [PATCH 05/23] block: Don't look for child references in append_open_options() In the previous patch we removed child references from bs->options, so there's no need to look for them here anymore. Signed-off-by: Alberto Garcia Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- block.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/block.c b/block.c index db71ea5b9a..98aca56785 100644 --- a/block.c +++ b/block.c @@ -5150,23 +5150,12 @@ static bool append_open_options(QDict *d, BlockDriverState *bs) { const QDictEntry *entry; QemuOptDesc *desc; - BdrvChild *child; bool found_any = false; for (entry = qdict_first(bs->options); entry; entry = qdict_next(bs->options, entry)) { - /* Exclude node-name references to children */ - QLIST_FOREACH(child, &bs->children, next) { - if (!strcmp(entry->key, child->name)) { - break; - } - } - if (child) { - continue; - } - - /* And exclude all non-driver-specific options */ + /* Exclude all non-driver-specific options */ for (desc = bdrv_runtime_opts.desc; desc->name; desc++) { if (!strcmp(qdict_entry_key(entry), desc->name)) { break; From db905283b8084f168d239b2d7bfbf3e2f01ed50f Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Thu, 6 Sep 2018 12:37:05 +0300 Subject: [PATCH 06/23] block: Allow child references on reopen In the previous patches we removed all child references from bs->{options,explicit_options} because keeping them is useless and wrong. Because of this, any attempt to reopen a BlockDriverState using a child reference as one of its options would result in a failure, because bdrv_reopen_prepare() would detect that there's a new option (the child reference) that wasn't present in bs->options. But passing child references on reopen can be useful. It's a way to specify a BDS's child without having to pass recursively all of the child's options, and if the reference points to a different BDS then this can allow us to replace the child. However, replacing the child is something that needs to be implemented case by case and only when it makes sense. For now, this patch allows passing a child reference as long as it points to the current child of the BlockDriverState. It's also important to remember that, as a consequence of the previous patches, this child reference will be removed from bs->{options,explicit_options} after the reopening has been completed. Signed-off-by: Alberto Garcia Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- block.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/block.c b/block.c index 98aca56785..ff1aded4b8 100644 --- a/block.c +++ b/block.c @@ -3242,6 +3242,24 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, QObject *new = entry->value; QObject *old = qdict_get(reopen_state->bs->options, entry->key); + /* Allow child references (child_name=node_name) as long as they + * point to the current child (i.e. everything stays the same). */ + if (qobject_type(new) == QTYPE_QSTRING) { + BdrvChild *child; + QLIST_FOREACH(child, &reopen_state->bs->children, next) { + if (!strcmp(child->name, entry->key)) { + break; + } + } + + if (child) { + const char *str = qobject_get_try_str(new); + if (!strcmp(child->bs->node_name, str)) { + continue; /* Found child with this name, skip option */ + } + } + } + /* * TODO: When using -drive to specify blockdev options, all values * will be strings; however, when using -blockdev, blockdev-add or From 57f9db9a947f4d32bbd56ed9b1e0989f6c306dfb Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Thu, 6 Sep 2018 12:37:06 +0300 Subject: [PATCH 07/23] block: Forbid trying to change unsupported options during reopen The bdrv_reopen_prepare() function checks all options passed to each BlockDriverState (in the reopen_state->options QDict) and makes all necessary preparations to apply the option changes requested by the user. Options are removed from the QDict as they are processed, so at the end of bdrv_reopen_prepare() only the options that can't be changed are left. Then a loop goes over all remaining options and verifies that the old and new values are identical, returning an error if they're not. The problem is that at the moment there are options that are removed from the QDict although they can't be changed. The consequence of this is any modification to any of those options is silently ignored: (qemu) qemu-io virtio0 "reopen -o discard=on" This happens when all options from bdrv_runtime_opts are removed from the QDict but then only a few of them are processed. Since it's especially important that "node-name" and "driver" are not changed, the code puts them back into the QDict so they are checked at the end of the function. Instead of putting only those two options back into the QDict, this patch puts all unprocessed options using qemu_opts_to_qdict(). update_flags_from_options() also needs to be modified to prevent BDRV_OPT_CACHE_NO_FLUSH, BDRV_OPT_CACHE_DIRECT and BDRV_OPT_READ_ONLY from going back to the QDict. Signed-off-by: Alberto Garcia Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- block.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/block.c b/block.c index ff1aded4b8..e36ae3e8d1 100644 --- a/block.c +++ b/block.c @@ -1094,19 +1094,19 @@ static void update_flags_from_options(int *flags, QemuOpts *opts) *flags &= ~BDRV_O_CACHE_MASK; assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH)); - if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) { + if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) { *flags |= BDRV_O_NO_FLUSH; } assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT)); - if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) { + if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_DIRECT, false)) { *flags |= BDRV_O_NOCACHE; } *flags &= ~BDRV_O_RDWR; assert(qemu_opt_find(opts, BDRV_OPT_READ_ONLY)); - if (!qemu_opt_get_bool(opts, BDRV_OPT_READ_ONLY, false)) { + if (!qemu_opt_get_bool_del(opts, BDRV_OPT_READ_ONLY, false)) { *flags |= BDRV_O_RDWR; } @@ -3156,7 +3156,6 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, BlockDriver *drv; QemuOpts *opts; QDict *orig_reopen_opts; - const char *value; bool read_only; assert(reopen_state != NULL); @@ -3179,17 +3178,10 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, update_flags_from_options(&reopen_state->flags, opts); - /* node-name and driver must be unchanged. Put them back into the QDict, so - * that they are checked at the end of this function. */ - value = qemu_opt_get(opts, "node-name"); - if (value) { - qdict_put_str(reopen_state->options, "node-name", value); - } - - value = qemu_opt_get(opts, "driver"); - if (value) { - qdict_put_str(reopen_state->options, "driver", value); - } + /* All other options (including node-name and driver) must be unchanged. + * Put them back into the QDict, so that they are checked at the end + * of this function. */ + qemu_opts_to_qdict(opts, reopen_state->options); /* If we are to stay read-only, do not allow permission change * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is From 8d3245750be30f3191f710f52fbe53ca10e78bfc Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Thu, 6 Sep 2018 12:37:07 +0300 Subject: [PATCH 08/23] file-posix: Forbid trying to change unsupported options during reopen The file-posix code is used for the "file", "host_device" and "host_cdrom" drivers, and it allows reopening images. However the only option that is actually processed is "x-check-cache-dropped", and changes in all other options (e.g. "filename") are silently ignored: (qemu) qemu-io virtio0 "reopen -o file.filename=no-such-file" While we could allow changing some of the other options, let's keep things as they are for now but return an error if the user tries to change any of them. Signed-off-by: Alberto Garcia Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- block/file-posix.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index bc5e54560a..2da3a76355 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -849,8 +849,13 @@ static int raw_reopen_prepare(BDRVReopenState *state, goto out; } - rs->check_cache_dropped = qemu_opt_get_bool(opts, "x-check-cache-dropped", - false); + rs->check_cache_dropped = + qemu_opt_get_bool_del(opts, "x-check-cache-dropped", false); + + /* This driver's reopen function doesn't currently allow changing + * other options, so let's put them back in the original QDict and + * bdrv_reopen_prepare() will detect changes and complain. */ + qemu_opts_to_qdict(opts, state->options); if (s->type == FTYPE_CD) { rs->open_flags |= O_NONBLOCK; From 593b30719713cfad98b869693095893f8edf76c1 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Thu, 6 Sep 2018 12:37:08 +0300 Subject: [PATCH 09/23] block: Allow changing 'discard' on reopen 'discard' is one of the basic BlockdevOptions available for all drivers, but it's not handled by bdrv_reopen_prepare() so any attempt to change it results in an error: (qemu) qemu-io virtio0 "reopen -o discard=on" Cannot change the option 'discard' Since there's no reason why we shouldn't allow changing it and the implementation is simple let's just do it. Signed-off-by: Alberto Garcia Signed-off-by: Kevin Wolf --- block.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/block.c b/block.c index e36ae3e8d1..7fb267bd1f 100644 --- a/block.c +++ b/block.c @@ -3156,6 +3156,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, BlockDriver *drv; QemuOpts *opts; QDict *orig_reopen_opts; + char *discard = NULL; bool read_only; assert(reopen_state != NULL); @@ -3178,6 +3179,15 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, update_flags_from_options(&reopen_state->flags, opts); + discard = qemu_opt_get_del(opts, "discard"); + if (discard != NULL) { + if (bdrv_parse_discard_flags(discard, &reopen_state->flags) != 0) { + error_setg(errp, "Invalid discard option"); + ret = -EINVAL; + goto error; + } + } + /* All other options (including node-name and driver) must be unchanged. * Put them back into the QDict, so that they are checked at the end * of this function. */ @@ -3291,6 +3301,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, error: qemu_opts_del(opts); qobject_unref(orig_reopen_opts); + g_free(discard); return ret; } From 543770bd2e8fbaac286a947dee91675adb3215c7 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Thu, 6 Sep 2018 12:37:09 +0300 Subject: [PATCH 10/23] block: Allow changing 'detect-zeroes' on reopen 'detect-zeroes' is one of the basic BlockdevOptions available for all drivers, but it's not handled by bdrv_reopen_prepare(), so any attempt to change it results in an error: (qemu) qemu-io virtio0 "reopen -o detect-zeroes=on" Cannot change the option 'detect-zeroes' Since there's no reason why we shouldn't allow changing it and the implementation is simple let's just do it. Signed-off-by: Alberto Garcia Signed-off-by: Kevin Wolf --- block.c | 64 +++++++++++++++++++++++++++---------------- include/block/block.h | 1 + 2 files changed, 41 insertions(+), 24 deletions(-) diff --git a/block.c b/block.c index 7fb267bd1f..7710b399a3 100644 --- a/block.c +++ b/block.c @@ -764,6 +764,31 @@ static void bdrv_join_options(BlockDriverState *bs, QDict *options, } } +static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts, + int open_flags, + Error **errp) +{ + Error *local_err = NULL; + char *value = qemu_opt_get_del(opts, "detect-zeroes"); + BlockdevDetectZeroesOptions detect_zeroes = + qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value, + BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err); + g_free(value); + if (local_err) { + error_propagate(errp, local_err); + return detect_zeroes; + } + + if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && + !(open_flags & BDRV_O_UNMAP)) + { + error_setg(errp, "setting detect-zeroes to unmap is not allowed " + "without setting discard operation to unmap"); + } + + return detect_zeroes; +} + /** * Set open flags for a given discard mode * @@ -1328,7 +1353,6 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, const char *driver_name = NULL; const char *node_name = NULL; const char *discard; - const char *detect_zeroes; QemuOpts *opts; BlockDriver *drv; Error *local_err = NULL; @@ -1417,29 +1441,12 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, } } - detect_zeroes = qemu_opt_get(opts, "detect-zeroes"); - if (detect_zeroes) { - BlockdevDetectZeroesOptions value = - qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, - detect_zeroes, - BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, - &local_err); - if (local_err) { - error_propagate(errp, local_err); - ret = -EINVAL; - goto fail_opts; - } - - if (value == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && - !(bs->open_flags & BDRV_O_UNMAP)) - { - error_setg(errp, "setting detect-zeroes to unmap is not allowed " - "without setting discard operation to unmap"); - ret = -EINVAL; - goto fail_opts; - } - - bs->detect_zeroes = value; + bs->detect_zeroes = + bdrv_parse_detect_zeroes(opts, bs->open_flags, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail_opts; } if (filename != NULL) { @@ -3188,6 +3195,14 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, } } + reopen_state->detect_zeroes = + bdrv_parse_detect_zeroes(opts, reopen_state->flags, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto error; + } + /* All other options (including node-name and driver) must be unchanged. * Put them back into the QDict, so that they are checked at the end * of this function. */ @@ -3338,6 +3353,7 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state) bs->options = reopen_state->options; bs->open_flags = reopen_state->flags; bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); + bs->detect_zeroes = reopen_state->detect_zeroes; /* Remove child references from bs->options and bs->explicit_options. * Child options were already removed in bdrv_reopen_queue_child() */ diff --git a/include/block/block.h b/include/block/block.h index 4edc1e8afa..b189cf422e 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -184,6 +184,7 @@ typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; typedef struct BDRVReopenState { BlockDriverState *bs; int flags; + BlockdevDetectZeroesOptions detect_zeroes; uint64_t perm, shared_perm; QDict *options; QDict *explicit_options; From 40fb215d483ce510e211b843352288894eb13285 Mon Sep 17 00:00:00 2001 From: Leonid Bloch Date: Wed, 26 Sep 2018 19:04:39 +0300 Subject: [PATCH 11/23] qcow2: Options' documentation fixes Signed-off-by: Leonid Bloch Reviewed-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- docs/qcow2-cache.txt | 21 ++++++++++++++------- qemu-options.hx | 9 ++++++--- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/docs/qcow2-cache.txt b/docs/qcow2-cache.txt index 8a09a5cc5f..7e28b41bd3 100644 --- a/docs/qcow2-cache.txt +++ b/docs/qcow2-cache.txt @@ -79,14 +79,14 @@ Choosing the right cache sizes In order to choose the cache sizes we need to know how they relate to the amount of allocated space. -The amount of virtual disk that can be mapped by the L2 and refcount +The part of the virtual disk that can be mapped by the L2 and refcount caches (in bytes) is: disk_size = l2_cache_size * cluster_size / 8 disk_size = refcount_cache_size * cluster_size * 8 / refcount_bits With the default values for cluster_size (64KB) and refcount_bits -(16), that is +(16), this becomes: disk_size = l2_cache_size * 8192 disk_size = refcount_cache_size * 32768 @@ -97,12 +97,16 @@ need: l2_cache_size = disk_size_GB * 131072 refcount_cache_size = disk_size_GB * 32768 -QEMU has a default L2 cache of 1MB (1048576 bytes) and a refcount -cache of 256KB (262144 bytes), so using the formulas we've just seen -we have +For example, 1MB of L2 cache is needed to cover every 8 GB of the virtual +image size (given that the default cluster size is used): - 1048576 / 131072 = 8 GB of virtual disk covered by that cache - 262144 / 32768 = 8 GB + 8 GB / 8192 = 1 MB + +The refcount cache is 4 times the cluster size by default. With the default +cluster size of 64 KB, it is 256 KB (262144 bytes). This is sufficient for +8 GB of image size: + + 262144 * 32768 = 8 GB How to configure the cache sizes @@ -130,6 +134,9 @@ There are a few things that need to be taken into account: memory as possible to the L2 cache before increasing the refcount cache size. + - At most two of "l2-cache-size", "refcount-cache-size", and "cache-size" + can be set simultaneously. + Unlike L2 tables, refcount blocks are not used during normal I/O but only during allocations and internal snapshots. In most cases they are accessed sequentially (even during random guest I/O) so increasing the diff --git a/qemu-options.hx b/qemu-options.hx index a642ad297f..2db6247eff 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -732,15 +732,18 @@ image file) @item cache-size The maximum total size of the L2 table and refcount block caches in bytes -(default: 1048576 bytes or 8 clusters, whichever is larger) +(default: the sum of l2-cache-size and refcount-cache-size) @item l2-cache-size The maximum size of the L2 table cache in bytes -(default: 4/5 of the total cache size) +(default: if cache-size is not defined - 1048576 bytes or 8 clusters, whichever +is larger; otherwise, as large as possible or needed within the cache-size, +while permitting the requested or the minimal refcount cache size) @item refcount-cache-size The maximum size of the refcount block cache in bytes -(default: 1/5 of the total cache size) +(default: 4 times the cluster size; or if cache-size is specified, the part of +it which is not used for the L2 cache) @item cache-clean-interval Clean unused entries in the L2 and refcount caches. The interval is in seconds. From 540b8492618ebbe98e7462bd7d31361b8cb10a05 Mon Sep 17 00:00:00 2001 From: Leonid Bloch Date: Wed, 26 Sep 2018 19:04:40 +0300 Subject: [PATCH 12/23] include: Add a lookup table of sizes Adding a lookup table for the powers of two, with the appropriate size prefixes. This is needed when a size has to be stringified, in which case something like '(1 * KiB)' would become a literal '(1 * (1L << 10))' string. Powers of two are used very often for sizes, so such a table will also make it easier and more intuitive to write them. This table is generatred using the following AWK script: BEGIN { suffix="KMGTPE"; for(i=10; i<64; i++) { val=2**i; s=substr(suffix, int(i/10), 1); n=2**(i%10); pad=21-int(log(n)/log(10)); printf("#define S_%d%siB %*d\n", n, s, pad, val); } } Signed-off-by: Leonid Bloch Reviewed-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- include/qemu/units.h | 55 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/include/qemu/units.h b/include/qemu/units.h index 692db3fbb2..68a7758650 100644 --- a/include/qemu/units.h +++ b/include/qemu/units.h @@ -17,4 +17,59 @@ #define PiB (INT64_C(1) << 50) #define EiB (INT64_C(1) << 60) +#define S_1KiB 1024 +#define S_2KiB 2048 +#define S_4KiB 4096 +#define S_8KiB 8192 +#define S_16KiB 16384 +#define S_32KiB 32768 +#define S_64KiB 65536 +#define S_128KiB 131072 +#define S_256KiB 262144 +#define S_512KiB 524288 +#define S_1MiB 1048576 +#define S_2MiB 2097152 +#define S_4MiB 4194304 +#define S_8MiB 8388608 +#define S_16MiB 16777216 +#define S_32MiB 33554432 +#define S_64MiB 67108864 +#define S_128MiB 134217728 +#define S_256MiB 268435456 +#define S_512MiB 536870912 +#define S_1GiB 1073741824 +#define S_2GiB 2147483648 +#define S_4GiB 4294967296 +#define S_8GiB 8589934592 +#define S_16GiB 17179869184 +#define S_32GiB 34359738368 +#define S_64GiB 68719476736 +#define S_128GiB 137438953472 +#define S_256GiB 274877906944 +#define S_512GiB 549755813888 +#define S_1TiB 1099511627776 +#define S_2TiB 2199023255552 +#define S_4TiB 4398046511104 +#define S_8TiB 8796093022208 +#define S_16TiB 17592186044416 +#define S_32TiB 35184372088832 +#define S_64TiB 70368744177664 +#define S_128TiB 140737488355328 +#define S_256TiB 281474976710656 +#define S_512TiB 562949953421312 +#define S_1PiB 1125899906842624 +#define S_2PiB 2251799813685248 +#define S_4PiB 4503599627370496 +#define S_8PiB 9007199254740992 +#define S_16PiB 18014398509481984 +#define S_32PiB 36028797018963968 +#define S_64PiB 72057594037927936 +#define S_128PiB 144115188075855872 +#define S_256PiB 288230376151711744 +#define S_512PiB 576460752303423488 +#define S_1EiB 1152921504606846976 +#define S_2EiB 2305843009213693952 +#define S_4EiB 4611686018427387904 +#define S_8EiB 9223372036854775808 + #endif From b6a95c6d10075bb540ce50198bbe22fc0a4392c7 Mon Sep 17 00:00:00 2001 From: Leonid Bloch Date: Wed, 26 Sep 2018 19:04:41 +0300 Subject: [PATCH 13/23] qcow2: Make sizes more humanly readable Signed-off-by: Leonid Bloch Reviewed-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/qcow2.c | 2 +- block/qcow2.h | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index c13153735a..d2c07ce9fe 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -830,7 +830,7 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, } } else { if (!l2_cache_size_set) { - *l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE, + *l2_cache_size = MAX(DEFAULT_L2_CACHE_SIZE, (uint64_t)DEFAULT_L2_CACHE_CLUSTERS * s->cluster_size); } diff --git a/block/qcow2.h b/block/qcow2.h index 81b844e936..a8d6f757b1 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -27,6 +27,7 @@ #include "crypto/block.h" #include "qemu/coroutine.h" +#include "qemu/units.h" //#define DEBUG_ALLOC //#define DEBUG_ALLOC2 @@ -43,11 +44,11 @@ /* 8 MB refcount table is enough for 2 PB images at 64k cluster size * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */ -#define QCOW_MAX_REFTABLE_SIZE 0x800000 +#define QCOW_MAX_REFTABLE_SIZE S_8MiB /* 32 MB L1 table is enough for 2 PB images at 64k cluster size * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */ -#define QCOW_MAX_L1_SIZE 0x2000000 +#define QCOW_MAX_L1_SIZE S_32MiB /* Allow for an average of 1k per snapshot table entry, should be plenty of * space for snapshot names and IDs */ @@ -75,9 +76,9 @@ /* Whichever is more */ #define DEFAULT_L2_CACHE_CLUSTERS 8 /* clusters */ -#define DEFAULT_L2_CACHE_BYTE_SIZE 1048576 /* bytes */ +#define DEFAULT_L2_CACHE_SIZE S_1MiB -#define DEFAULT_CLUSTER_SIZE 65536 +#define DEFAULT_CLUSTER_SIZE S_64KiB #define QCOW2_OPT_LAZY_REFCOUNTS "lazy-refcounts" From 657ada52abb85140e56949f522ecec527b256450 Mon Sep 17 00:00:00 2001 From: Leonid Bloch Date: Wed, 26 Sep 2018 19:04:42 +0300 Subject: [PATCH 14/23] qcow2: Avoid duplication in setting the refcount cache size The refcount cache size does not need to be set to its minimum value in read_cache_sizes(), as it is set to at least its minimum value in qcow2_update_options_prepare(). Signed-off-by: Leonid Bloch Reviewed-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/qcow2.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index d2c07ce9fe..cd0053b6ee 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -834,10 +834,9 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, (uint64_t)DEFAULT_L2_CACHE_CLUSTERS * s->cluster_size); } - if (!refcount_cache_size_set) { - *refcount_cache_size = min_refcount_cache; - } } + /* l2_cache_size and refcount_cache_size are ensured to have at least + * their minimum values in qcow2_update_options_prepare() */ if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) || *l2_cache_entry_size > s->cluster_size || From b749562d9822d14ef69c9eaa5f85903010b86c30 Mon Sep 17 00:00:00 2001 From: Leonid Bloch Date: Wed, 26 Sep 2018 19:04:43 +0300 Subject: [PATCH 15/23] qcow2: Assign the L2 cache relatively to the image size Sufficient L2 cache can noticeably improve the performance when using large images with frequent I/O. Previously, unless 'cache-size' was specified and was large enough, the L2 cache was set to a certain size without taking the virtual image size into account. Now, the L2 cache assignment is aware of the virtual size of the image, and will cover the entire image, unless the cache size needed for that is larger than a certain maximum. This maximum is set to 1 MB by default (enough to cover an 8 GB image with the default cluster size) but can be increased or decreased using the 'l2-cache-size' option. This option was previously documented as the *maximum* L2 cache size, and this patch makes it behave as such, instead of as a constant size. Also, the existing option 'cache-size' can limit the sum of both L2 and refcount caches, as previously. Signed-off-by: Leonid Bloch Reviewed-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/qcow2.c | 21 +++++++++------------ block/qcow2.h | 4 +--- docs/qcow2-cache.txt | 15 ++++++++++----- qemu-options.hx | 6 +++--- tests/qemu-iotests/137 | 8 +++++++- tests/qemu-iotests/137.out | 4 +++- 6 files changed, 33 insertions(+), 25 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index cd0053b6ee..589f6c1b1c 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -777,29 +777,35 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, uint64_t *refcount_cache_size, Error **errp) { BDRVQcow2State *s = bs->opaque; - uint64_t combined_cache_size; + uint64_t combined_cache_size, l2_cache_max_setting; bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set; int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size; + uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; + uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8); combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE); l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE); refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE); combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0); - *l2_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE, 0); + l2_cache_max_setting = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE, + DEFAULT_L2_CACHE_MAX_SIZE); *refcount_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0); *l2_cache_entry_size = qemu_opt_get_size( opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size); + *l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting); + if (combined_cache_size_set) { if (l2_cache_size_set && refcount_cache_size_set) { error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set " "at the same time"); return; - } else if (*l2_cache_size > combined_cache_size) { + } else if (l2_cache_size_set && + (l2_cache_max_setting > combined_cache_size)) { error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed " QCOW2_OPT_CACHE_SIZE); return; @@ -814,9 +820,6 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, } else if (refcount_cache_size_set) { *l2_cache_size = combined_cache_size - *refcount_cache_size; } else { - uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; - uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8); - /* Assign as much memory as possible to the L2 cache, and * use the remainder for the refcount cache */ if (combined_cache_size >= max_l2_cache + min_refcount_cache) { @@ -828,12 +831,6 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, *l2_cache_size = combined_cache_size - *refcount_cache_size; } } - } else { - if (!l2_cache_size_set) { - *l2_cache_size = MAX(DEFAULT_L2_CACHE_SIZE, - (uint64_t)DEFAULT_L2_CACHE_CLUSTERS - * s->cluster_size); - } } /* l2_cache_size and refcount_cache_size are ensured to have at least * their minimum values in qcow2_update_options_prepare() */ diff --git a/block/qcow2.h b/block/qcow2.h index a8d6f757b1..2f8c1fd15c 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -74,9 +74,7 @@ /* Must be at least 4 to cover all cases of refcount table growth */ #define MIN_REFCOUNT_CACHE_SIZE 4 /* clusters */ -/* Whichever is more */ -#define DEFAULT_L2_CACHE_CLUSTERS 8 /* clusters */ -#define DEFAULT_L2_CACHE_SIZE S_1MiB +#define DEFAULT_L2_CACHE_MAX_SIZE S_1MiB #define DEFAULT_CLUSTER_SIZE S_64KiB diff --git a/docs/qcow2-cache.txt b/docs/qcow2-cache.txt index 7e28b41bd3..750447ea4f 100644 --- a/docs/qcow2-cache.txt +++ b/docs/qcow2-cache.txt @@ -125,8 +125,12 @@ There are a few things that need to be taken into account: - Both caches must have a size that is a multiple of the cluster size (or the cache entry size: see "Using smaller cache sizes" below). - - The default L2 cache size is 8 clusters or 1MB (whichever is more), - and the minimum is 2 clusters (or 2 cache entries, see below). + - The maximum L2 cache size is 1 MB by default (enough for full coverage + of 8 GB images, with the default cluster size). This value can be + modified using the "l2-cache-size" option. QEMU will not use more memory + than needed to hold all of the image's L2 tables, regardless of this max. + value. The minimal L2 cache size is 2 clusters (or 2 cache entries, see + below). - The default (and minimum) refcount cache size is 4 clusters. @@ -184,9 +188,10 @@ Some things to take into account: always uses the cluster size as the entry size. - If the L2 cache is big enough to hold all of the image's L2 tables - (as explained in the "Choosing the right cache sizes" section - earlier in this document) then none of this is necessary and you - can omit the "l2-cache-entry-size" parameter altogether. + (as explained in the "Choosing the right cache sizes" and "How to + configure the cache sizes" sections in this document) then none of + this is necessary and you can omit the "l2-cache-entry-size" + parameter altogether. Reducing the memory usage diff --git a/qemu-options.hx b/qemu-options.hx index 2db6247eff..6eef0f5651 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -736,9 +736,9 @@ The maximum total size of the L2 table and refcount block caches in bytes @item l2-cache-size The maximum size of the L2 table cache in bytes -(default: if cache-size is not defined - 1048576 bytes or 8 clusters, whichever -is larger; otherwise, as large as possible or needed within the cache-size, -while permitting the requested or the minimal refcount cache size) +(default: if cache-size is not specified - 1M; otherwise, as large as possible +within the cache-size, while permitting the requested or the minimal refcount +cache size) @item refcount-cache-size The maximum size of the refcount block cache in bytes diff --git a/tests/qemu-iotests/137 b/tests/qemu-iotests/137 index 87965625d8..19e8597306 100755 --- a/tests/qemu-iotests/137 +++ b/tests/qemu-iotests/137 @@ -109,7 +109,6 @@ $QEMU_IO \ -c "reopen -o cache-size=1M,l2-cache-size=64k,refcount-cache-size=64k" \ -c "reopen -o cache-size=1M,l2-cache-size=2M" \ -c "reopen -o cache-size=1M,refcount-cache-size=2M" \ - -c "reopen -o l2-cache-size=256T" \ -c "reopen -o l2-cache-entry-size=33k" \ -c "reopen -o l2-cache-entry-size=128k" \ -c "reopen -o refcount-cache-size=256T" \ @@ -119,6 +118,13 @@ $QEMU_IO \ -c "reopen -o cache-clean-interval=-1" \ "$TEST_IMG" | _filter_qemu_io +IMGOPTS="cluster_size=256k" _make_test_img 32P +$QEMU_IO \ + -c "reopen -o l2-cache-entry-size=512,l2-cache-size=1T" \ + "$TEST_IMG" | _filter_qemu_io + +_make_test_img 64M + echo echo === Test transaction semantics === echo diff --git a/tests/qemu-iotests/137.out b/tests/qemu-iotests/137.out index 6a2ffc71fd..2c080b72f3 100644 --- a/tests/qemu-iotests/137.out +++ b/tests/qemu-iotests/137.out @@ -19,7 +19,6 @@ Parameter 'lazy-refcounts' expects 'on' or 'off' cache-size, l2-cache-size and refcount-cache-size may not be set at the same time l2-cache-size may not exceed cache-size refcount-cache-size may not exceed cache-size -L2 cache size too big L2 cache entry size must be a power of two between 512 and the cluster size (65536) L2 cache entry size must be a power of two between 512 and the cluster size (65536) Refcount cache size too big @@ -27,6 +26,9 @@ Conflicting values for qcow2 options 'overlap-check' ('constant') and 'overlap-c Unsupported value 'blubb' for qcow2 option 'overlap-check'. Allowed are any of the following: none, constant, cached, all Unsupported value 'blubb' for qcow2 option 'overlap-check'. Allowed are any of the following: none, constant, cached, all Cache clean interval too big +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=36028797018963968 +L2 cache size too big +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 === Test transaction semantics === From 80668d0fb735f0839a46278a7d42116089b82816 Mon Sep 17 00:00:00 2001 From: Leonid Bloch Date: Wed, 26 Sep 2018 19:04:44 +0300 Subject: [PATCH 16/23] qcow2: Increase the default upper limit on the L2 cache size The upper limit on the L2 cache size is increased from 1 MB to 32 MB on Linux platforms, and to 8 MB on other platforms (this difference is caused by the ability to set intervals for cache cleaning on Linux platforms only). This is done in order to allow default full coverage with the L2 cache for images of up to 256 GB in size (was 8 GB). Note, that only the needed amount to cover the full image is allocated. The value which is changed here is just the upper limit on the L2 cache size, beyond which it will not grow, even if the size of the image will require it to. Signed-off-by: Leonid Bloch Reviewed-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/qcow2.h | 6 +++++- docs/qcow2-cache.txt | 15 +++++++++------ qemu-options.hx | 6 +++--- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/block/qcow2.h b/block/qcow2.h index 2f8c1fd15c..0f0e3534bf 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -74,7 +74,11 @@ /* Must be at least 4 to cover all cases of refcount table growth */ #define MIN_REFCOUNT_CACHE_SIZE 4 /* clusters */ -#define DEFAULT_L2_CACHE_MAX_SIZE S_1MiB +#ifdef CONFIG_LINUX +#define DEFAULT_L2_CACHE_MAX_SIZE S_32MiB +#else +#define DEFAULT_L2_CACHE_MAX_SIZE S_8MiB +#endif #define DEFAULT_CLUSTER_SIZE S_64KiB diff --git a/docs/qcow2-cache.txt b/docs/qcow2-cache.txt index 750447ea4f..1fcc0658b2 100644 --- a/docs/qcow2-cache.txt +++ b/docs/qcow2-cache.txt @@ -125,12 +125,15 @@ There are a few things that need to be taken into account: - Both caches must have a size that is a multiple of the cluster size (or the cache entry size: see "Using smaller cache sizes" below). - - The maximum L2 cache size is 1 MB by default (enough for full coverage - of 8 GB images, with the default cluster size). This value can be - modified using the "l2-cache-size" option. QEMU will not use more memory - than needed to hold all of the image's L2 tables, regardless of this max. - value. The minimal L2 cache size is 2 clusters (or 2 cache entries, see - below). + - The maximum L2 cache size is 32 MB by default on Linux platforms (enough + for full coverage of 256 GB images, with the default cluster size). This + value can be modified using the "l2-cache-size" option. QEMU will not use + more memory than needed to hold all of the image's L2 tables, regardless + of this max. value. + On non-Linux platforms the maximal value is smaller by default (8 MB) and + this difference stems from the fact that on Linux the cache can be cleared + periodically if needed, using the "cache-clean-interval" option (see below). + The minimal L2 cache size is 2 clusters (or 2 cache entries, see below). - The default (and minimum) refcount cache size is 4 clusters. diff --git a/qemu-options.hx b/qemu-options.hx index 6eef0f5651..14aee78c6c 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -736,9 +736,9 @@ The maximum total size of the L2 table and refcount block caches in bytes @item l2-cache-size The maximum size of the L2 table cache in bytes -(default: if cache-size is not specified - 1M; otherwise, as large as possible -within the cache-size, while permitting the requested or the minimal refcount -cache size) +(default: if cache-size is not specified - 32M on Linux platforms, and 8M on +non-Linux platforms; otherwise, as large as possible within the cache-size, +while permitting the requested or the minimal refcount cache size) @item refcount-cache-size The maximum size of the refcount block cache in bytes From 45b4949c7bcdcd998cb42f5c517e80a2657cfd33 Mon Sep 17 00:00:00 2001 From: Leonid Bloch Date: Wed, 26 Sep 2018 19:04:45 +0300 Subject: [PATCH 17/23] qcow2: Resize the cache upon image resizing The caches are now recalculated upon image resizing. This is done because the new default behavior of assigning L2 cache relatively to the image size, implies that the cache will be adapted accordingly after an image resize. Signed-off-by: Leonid Bloch Reviewed-by: Alberto Garcia Signed-off-by: Kevin Wolf --- block/qcow2.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c index 589f6c1b1c..20b5093269 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3418,6 +3418,7 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, uint64_t old_length; int64_t new_l1_size; int ret; + QDict *options; if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA && prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL) @@ -3642,6 +3643,8 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, } } + bs->total_sectors = offset / BDRV_SECTOR_SIZE; + /* write updated header.size */ offset = cpu_to_be64(offset); ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size), @@ -3652,6 +3655,14 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, } s->l1_vm_state_index = new_l1_size; + + /* Update cache sizes */ + options = qdict_clone_shallow(bs->options); + ret = qcow2_update_options(bs, options, s->flags, errp); + qobject_unref(options); + if (ret < 0) { + goto fail; + } ret = 0; fail: qemu_co_mutex_unlock(&s->lock); From e957b50b8daecfc39a1ac09855b0eacb6edfd328 Mon Sep 17 00:00:00 2001 From: Leonid Bloch Date: Wed, 26 Sep 2018 19:04:46 +0300 Subject: [PATCH 18/23] qcow2: Set the default cache-clean-interval to 10 minutes The default cache-clean-interval is set to 10 minutes, in order to lower the overhead of the qcow2 caches (before the default was 0, i.e. disabled). * For non-Linux platforms the default is kept at 0, because cache-clean-interval is not supported there yet. Signed-off-by: Leonid Bloch Reviewed-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/qcow2.c | 2 +- block/qcow2.h | 4 +++- docs/qcow2-cache.txt | 4 ++-- qapi/block-core.json | 3 ++- qemu-options.hx | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 20b5093269..95e1c98daa 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -944,7 +944,7 @@ static int qcow2_update_options_prepare(BlockDriverState *bs, /* New interval for cache cleanup timer */ r->cache_clean_interval = qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL, - s->cache_clean_interval); + DEFAULT_CACHE_CLEAN_INTERVAL); #ifndef CONFIG_LINUX if (r->cache_clean_interval != 0) { error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL diff --git a/block/qcow2.h b/block/qcow2.h index 0f0e3534bf..ba430316b9 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -76,13 +76,15 @@ #ifdef CONFIG_LINUX #define DEFAULT_L2_CACHE_MAX_SIZE S_32MiB +#define DEFAULT_CACHE_CLEAN_INTERVAL 600 /* seconds */ #else #define DEFAULT_L2_CACHE_MAX_SIZE S_8MiB +/* Cache clean interval is currently available only on Linux, so must be 0 */ +#define DEFAULT_CACHE_CLEAN_INTERVAL 0 #endif #define DEFAULT_CLUSTER_SIZE S_64KiB - #define QCOW2_OPT_LAZY_REFCOUNTS "lazy-refcounts" #define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request" #define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot" diff --git a/docs/qcow2-cache.txt b/docs/qcow2-cache.txt index 1fcc0658b2..59358b816f 100644 --- a/docs/qcow2-cache.txt +++ b/docs/qcow2-cache.txt @@ -210,8 +210,8 @@ This example removes all unused cache entries every 15 minutes: -drive file=hd.qcow2,cache-clean-interval=900 -If unset, the default value for this parameter is 0 and it disables -this feature. +If unset, the default value for this parameter is 600. Setting it to 0 +disables this feature. Note that this functionality currently relies on the MADV_DONTNEED argument for madvise() to actually free the memory. This is a diff --git a/qapi/block-core.json b/qapi/block-core.json index ac3b48ee54..46dac23d2f 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2895,7 +2895,8 @@ # # @cache-clean-interval: clean unused entries in the L2 and refcount # caches. The interval is in seconds. The default value -# is 0 and it disables this feature (since 2.5) +# is 600, and 0 disables this feature. (since 2.5) +# # @encrypt: Image decryption options. Mandatory for # encrypted images, except when doing a metadata-only # probe of the image. (since 2.10) diff --git a/qemu-options.hx b/qemu-options.hx index 14aee78c6c..52d9d9f06d 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -747,7 +747,7 @@ it which is not used for the L2 cache) @item cache-clean-interval Clean unused entries in the L2 and refcount caches. The interval is in seconds. -The default value is 0 and it disables this feature. +The default value is 600. Setting it to 0 disables this feature. @item pass-discard-request Whether discard requests to the qcow2 device should be forwarded to the data From bd016b912cc68c6f6c68cd5acb2e13126bd9e05c Mon Sep 17 00:00:00 2001 From: Leonid Bloch Date: Wed, 26 Sep 2018 19:04:47 +0300 Subject: [PATCH 19/23] qcow2: Explicit number replaced by a constant Signed-off-by: Leonid Bloch Reviewed-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/qcow2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 95e1c98daa..7277feda13 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1324,7 +1324,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, /* 2^(s->refcount_order - 3) is the refcount width in bytes */ s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3); s->refcount_block_size = 1 << s->refcount_block_bits; - bs->total_sectors = header.size / 512; + bs->total_sectors = header.size / BDRV_SECTOR_SIZE; s->csize_shift = (62 - (s->cluster_bits - 8)); s->csize_mask = (1 << (s->cluster_bits - 8)) - 1; s->cluster_offset_mask = (1LL << s->csize_shift) - 1; @@ -3450,7 +3450,7 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, goto fail; } - old_length = bs->total_sectors * 512; + old_length = bs->total_sectors * BDRV_SECTOR_SIZE; new_l1_size = size_to_l1(s, offset); if (offset < old_length) { From cb53460b708db3617ab73248374d071d5552c263 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 28 Sep 2018 11:11:50 +0200 Subject: [PATCH 20/23] block-backend: Set werror/rerror defaults in blk_new() Currently, the default values for werror and rerror have to be set explicitly with blk_set_on_error() by the callers of blk_new(). The only caller actually doing this is blockdev_init(), which is called for BlockBackends created using -drive. In particular, anonymous BlockBackends created with -device ...,drive= didn't get the correct default set and instead defaulted to the integer value 0 (= BLOCKDEV_ON_ERROR_REPORT). This is the intended default for rerror anyway, but the default for werror should be BLOCKDEV_ON_ERROR_ENOSPC. Set the defaults in blk_new() instead so that they apply no matter what way the BlockBackend was created. Cc: qemu-stable@nongnu.org Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Fam Zheng --- block/block-backend.c | 3 +++ tests/qemu-iotests/067.out | 1 + 2 files changed, 4 insertions(+) diff --git a/block/block-backend.c b/block/block-backend.c index 7b1ec5071b..dc0cd57724 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -325,6 +325,9 @@ BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm) blk->shared_perm = shared_perm; blk_set_enable_write_cache(blk, true); + blk->on_read_error = BLOCKDEV_ON_ERROR_REPORT; + blk->on_write_error = BLOCKDEV_ON_ERROR_ENOSPC; + block_acct_init(&blk->stats); notifier_list_init(&blk->remove_bs_notifiers); diff --git a/tests/qemu-iotests/067.out b/tests/qemu-iotests/067.out index 2e71cff3ce..b10c71db03 100644 --- a/tests/qemu-iotests/067.out +++ b/tests/qemu-iotests/067.out @@ -385,6 +385,7 @@ Testing: -device virtio-scsi -device scsi-cd,id=cd0 { "return": [ { + "io-status": "ok", "device": "", "locked": false, "removable": true, From e3a7b4556ee33feba2b396769a9c8354be06b024 Mon Sep 17 00:00:00 2001 From: Leonid Bloch Date: Sat, 29 Sep 2018 12:54:54 +0300 Subject: [PATCH 21/23] qcow2: Fix cache-clean-interval documentation Fixing cache-clean-interval documentation following the recent change to a default of 600 seconds on supported plarforms (only Linux currently). Signed-off-by: Leonid Bloch Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- docs/qcow2-cache.txt | 20 ++++++++++---------- qapi/block-core.json | 3 ++- qemu-options.hx | 3 ++- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/docs/qcow2-cache.txt b/docs/qcow2-cache.txt index 59358b816f..c459bf5dd3 100644 --- a/docs/qcow2-cache.txt +++ b/docs/qcow2-cache.txt @@ -202,18 +202,18 @@ Reducing the memory usage It is possible to clean unused cache entries in order to reduce the memory usage during periods of low I/O activity. -The parameter "cache-clean-interval" defines an interval (in seconds). -All cache entries that haven't been accessed during that interval are -removed from memory. +The parameter "cache-clean-interval" defines an interval (in seconds), +after which all the cache entries that haven't been accessed during the +interval are removed from memory. Setting this parameter to 0 disables this +feature. -This example removes all unused cache entries every 15 minutes: +The following example removes all unused cache entries every 15 minutes: -drive file=hd.qcow2,cache-clean-interval=900 -If unset, the default value for this parameter is 600. Setting it to 0 -disables this feature. +If unset, the default value for this parameter is 600 on platforms which +support this functionality, and is 0 (disabled) on other platforms. -Note that this functionality currently relies on the MADV_DONTNEED -argument for madvise() to actually free the memory. This is a -Linux-specific feature, so cache-clean-interval is not supported in -other systems. +This functionality currently relies on the MADV_DONTNEED argument for +madvise() to actually free the memory. This is a Linux-specific feature, +so cache-clean-interval is not supported on other systems. diff --git a/qapi/block-core.json b/qapi/block-core.json index 46dac23d2f..25b8a0e744 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2895,7 +2895,8 @@ # # @cache-clean-interval: clean unused entries in the L2 and refcount # caches. The interval is in seconds. The default value -# is 600, and 0 disables this feature. (since 2.5) +# is 600 on supporting platforms, and 0 on other +# platforms. 0 disables this feature. (since 2.5) # # @encrypt: Image decryption options. Mandatory for # encrypted images, except when doing a metadata-only diff --git a/qemu-options.hx b/qemu-options.hx index 52d9d9f06d..f139459e80 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -747,7 +747,8 @@ it which is not used for the L2 cache) @item cache-clean-interval Clean unused entries in the L2 and refcount caches. The interval is in seconds. -The default value is 600. Setting it to 0 disables this feature. +The default value is 600 on supporting platforms, and 0 on other platforms. +Setting it to 0 disables this feature. @item pass-discard-request Whether discard requests to the qcow2 device should be forwarded to the data From 90042c6aac52be33f96ea38e3d12238628972927 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 1 Oct 2018 16:27:22 +0200 Subject: [PATCH 22/23] test-replication: Lock AioContext around blk_unref() Recently, the test case has started failing because some job related functions want to drop the AioContext lock even though it hasn't been taken: (gdb) bt #0 0x00007f51c067c9fb in raise () from /lib64/libc.so.6 #1 0x00007f51c067e77d in abort () from /lib64/libc.so.6 #2 0x0000558c9d5dde7b in error_exit (err=, msg=msg@entry=0x558c9d6fe120 <__func__.18373> "qemu_mutex_unlock_impl") at util/qemu-thread-posix.c:36 #3 0x0000558c9d6b5263 in qemu_mutex_unlock_impl (mutex=mutex@entry=0x558c9f3999a0, file=file@entry=0x558c9d6fd36f "util/async.c", line=line@entry=516) at util/qemu-thread-posix.c:96 #4 0x0000558c9d6b0565 in aio_context_release (ctx=ctx@entry=0x558c9f399940) at util/async.c:516 #5 0x0000558c9d5eb3da in job_completed_txn_abort (job=0x558c9f68e640) at job.c:738 #6 0x0000558c9d5eb227 in job_finish_sync (job=0x558c9f68e640, finish=finish@entry=0x558c9d5eb8d0 , errp=errp@entry=0x0) at job.c:986 #7 0x0000558c9d5eb8ee in job_cancel_sync (job=) at job.c:941 #8 0x0000558c9d64d853 in replication_close (bs=) at block/replication.c:148 #9 0x0000558c9d5e5c9f in bdrv_close (bs=0x558c9f41b020) at block.c:3420 #10 bdrv_delete (bs=0x558c9f41b020) at block.c:3629 #11 bdrv_unref (bs=0x558c9f41b020) at block.c:4685 #12 0x0000558c9d62a3f3 in blk_remove_bs (blk=blk@entry=0x558c9f42a7c0) at block/block-backend.c:783 #13 0x0000558c9d62a667 in blk_delete (blk=0x558c9f42a7c0) at block/block-backend.c:402 #14 blk_unref (blk=0x558c9f42a7c0) at block/block-backend.c:457 #15 0x0000558c9d5dfcea in test_secondary_stop () at tests/test-replication.c:478 #16 0x00007f51c1f13178 in g_test_run_suite_internal () from /lib64/libglib-2.0.so.0 #17 0x00007f51c1f1337b in g_test_run_suite_internal () from /lib64/libglib-2.0.so.0 #18 0x00007f51c1f1337b in g_test_run_suite_internal () from /lib64/libglib-2.0.so.0 #19 0x00007f51c1f13552 in g_test_run_suite () from /lib64/libglib-2.0.so.0 #20 0x00007f51c1f13571 in g_test_run () from /lib64/libglib-2.0.so.0 #21 0x0000558c9d5de31f in main (argc=, argv=) at tests/test-replication.c:581 It is yet unclear whether this should really be considered a bug in the test case or whether blk_unref() should work for callers that haven't taken the AioContext lock, but in order to fix the build tests quickly, just take the AioContext lock around blk_unref(). Signed-off-by: Kevin Wolf --- tests/test-replication.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test-replication.c b/tests/test-replication.c index c8165ae954..f085d1993a 100644 --- a/tests/test-replication.c +++ b/tests/test-replication.c @@ -207,13 +207,17 @@ static BlockBackend *start_primary(void) static void teardown_primary(void) { BlockBackend *blk; + AioContext *ctx; /* remove P_ID */ blk = blk_by_name(P_ID); assert(blk); + ctx = blk_get_aio_context(blk); + aio_context_acquire(ctx); monitor_remove_blk(blk); blk_unref(blk); + aio_context_release(ctx); } static void test_primary_read(void) @@ -365,20 +369,27 @@ static void teardown_secondary(void) { /* only need to destroy two BBs */ BlockBackend *blk; + AioContext *ctx; /* remove S_LOCAL_DISK_ID */ blk = blk_by_name(S_LOCAL_DISK_ID); assert(blk); + ctx = blk_get_aio_context(blk); + aio_context_acquire(ctx); monitor_remove_blk(blk); blk_unref(blk); + aio_context_release(ctx); /* remove S_ID */ blk = blk_by_name(S_ID); assert(blk); + ctx = blk_get_aio_context(blk); + aio_context_acquire(ctx); monitor_remove_blk(blk); blk_unref(blk); + aio_context_release(ctx); } static void test_secondary_read(void) From dd353157942a59c21da07da5ac8749a871f7c3ed Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 1 Oct 2018 17:09:18 +0200 Subject: [PATCH 23/23] tests/test-bdrv-drain: Fix too late qemu_event_reset() qemu_event_reset() must be called before the AIO request in a different iothread is submitted. Otherwise the request could be completed before we do the qemu_event_reset() and the test would hang in qemu_event_wait(). Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Tested-by: Max Reitz --- tests/test-bdrv-drain.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c index c9f29c8b10..ee1740ff06 100644 --- a/tests/test-bdrv-drain.c +++ b/tests/test-bdrv-drain.c @@ -694,6 +694,8 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) s->bh_indirection_ctx = ctx_b; aio_ret = -EINPROGRESS; + qemu_event_reset(&done_event); + if (drain_thread == 0) { acb = blk_aio_preadv(blk, 0, &qiov, 0, test_iothread_aio_cb, &aio_ret); } else { @@ -723,7 +725,6 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) * but the drain in this thread can continue immediately after * bdrv_dec_in_flight() and aio_ret might be assigned only slightly * later. */ - qemu_event_reset(&done_event); do_drain_begin(drain_type, bs); g_assert_cmpint(bs->in_flight, ==, 0); @@ -743,7 +744,6 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) } break; case 1: - qemu_event_reset(&done_event); aio_bh_schedule_oneshot(ctx_a, test_iothread_drain_entry, &data); qemu_event_wait(&done_event); break;