From dacaa16238cc5915a609ddaab4b7f81c4bceb9ae Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 20 Nov 2017 14:59:13 +0100 Subject: [PATCH 1/7] block: Don't use BLK_PERM_CONSISTENT_READ for format probing For format probing, we don't really care whether all of the image content is consistent. The only thing we're looking at is the image header, and specifically the magic numbers that are expected to never change, no matter how inconsistent the guest visible disk content is. Therefore, don't request BLK_PERM_CONSISTENT_READ. This allows to use format probing, e.g. in the context of 'qemu-img info', even while the guest visible data in the image is inconsistent during a running block job. Signed-off-by: Kevin Wolf Reviewed-by: Fam Zheng --- block.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index 6c8ef98dfa..68b724206d 100644 --- a/block.c +++ b/block.c @@ -2579,7 +2579,10 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, goto fail; } if (file_bs != NULL) { - file = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); + /* Not requesting BLK_PERM_CONSISTENT_READ because we're only + * looking at the header to guess the image format. This works even + * in cases where a guest would not see a consistent state. */ + file = blk_new(0, BLK_PERM_ALL); blk_insert_bs(file, file_bs, &local_err); bdrv_unref(file_bs); if (local_err) { From 1f4ad7d3b8f7162ec0471506d86f57a5d77b8f76 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 20 Nov 2017 15:02:48 +0100 Subject: [PATCH 2/7] block: Don't request I/O permission with BDRV_O_NO_IO 'qemu-img info' makes sense even when BLK_PERM_CONSISTENT_READ cannot be granted because of a block job in a running qemu process. It already sets BDRV_O_NO_IO to indicate that it doesn't access the guest visible data at all. Check the BDRV_O_NO_IO flags in blk_new_open(), so that I/O related permissions are not unnecessarily requested and 'qemu-img info' can work even if BLK_PERM_CONSISTENT_READ cannot be granted. Signed-off-by: Kevin Wolf Reviewed-by: Fam Zheng Reviewed-by: Alberto Garcia --- block/block-backend.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 5836cb3087..baef8e7abc 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -299,7 +299,7 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, { BlockBackend *blk; BlockDriverState *bs; - uint64_t perm; + uint64_t perm = 0; /* blk_new_open() is mainly used in .bdrv_create implementations and the * tools where sharing isn't a concern because the BDS stays private, so we @@ -309,9 +309,11 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, * caller of blk_new_open() doesn't make use of the permissions, but they * shouldn't hurt either. We can still share everything here because the * guest devices will add their own blockers if they can't share. */ - perm = BLK_PERM_CONSISTENT_READ; - if (flags & BDRV_O_RDWR) { - perm |= BLK_PERM_WRITE; + if ((flags & BDRV_O_NO_IO) == 0) { + perm |= BLK_PERM_CONSISTENT_READ; + if (flags & BDRV_O_RDWR) { + perm |= BLK_PERM_WRITE; + } } if (flags & BDRV_O_RESIZE) { perm |= BLK_PERM_RESIZE; From 0b62bcbc61c0cd4c9e7fb3863cf5dc8016b0b4ed Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 20 Nov 2017 15:28:41 +0100 Subject: [PATCH 3/7] block: Add errp to bdrv_snapshot_goto() Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: John Snow --- block/snapshot.c | 23 +++++++++++++++++------ include/block/snapshot.h | 3 ++- qemu-img.c | 6 +++--- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/block/snapshot.c b/block/snapshot.c index be0743abac..75562df4cc 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -177,18 +177,21 @@ int bdrv_snapshot_create(BlockDriverState *bs, } int bdrv_snapshot_goto(BlockDriverState *bs, - const char *snapshot_id) + const char *snapshot_id, + Error **errp) { BlockDriver *drv = bs->drv; int ret, open_ret; int64_t len; if (!drv) { + error_setg(errp, "Block driver is closed"); return -ENOMEDIUM; } len = bdrv_getlength(bs); if (len < 0) { + error_setg_errno(errp, -len, "Cannot get block device size"); return len; } /* We should set all bits in all enabled dirty bitmaps, because dirty @@ -200,13 +203,18 @@ int bdrv_snapshot_goto(BlockDriverState *bs, bdrv_set_dirty(bs, 0, len); if (drv->bdrv_snapshot_goto) { - return drv->bdrv_snapshot_goto(bs, snapshot_id); + ret = drv->bdrv_snapshot_goto(bs, snapshot_id); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to load snapshot"); + } + return ret; } if (bs->file) { BlockDriverState *file; QDict *options = qdict_clone_shallow(bs->options); QDict *file_options; + Error *local_err = NULL; file = bs->file->bs; /* Prevent it from getting deleted when detached from bs */ @@ -220,13 +228,15 @@ int bdrv_snapshot_goto(BlockDriverState *bs, bdrv_unref_child(bs, bs->file); bs->file = NULL; - ret = bdrv_snapshot_goto(file, snapshot_id); - open_ret = drv->bdrv_open(bs, options, bs->open_flags, NULL); + ret = bdrv_snapshot_goto(file, snapshot_id, errp); + open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err); QDECREF(options); if (open_ret < 0) { bdrv_unref(file); bs->drv = NULL; - return open_ret; + /* A bdrv_snapshot_goto() error takes precedence */ + error_propagate(errp, local_err); + return ret < 0 ? ret : open_ret; } assert(bs->file->bs == file); @@ -234,6 +244,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs, return ret; } + error_setg(errp, "Block driver does not support snapshots"); return -ENOTSUP; } @@ -467,7 +478,7 @@ int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs) aio_context_acquire(ctx); if (bdrv_can_snapshot(bs)) { - err = bdrv_snapshot_goto(bs, name); + err = bdrv_snapshot_goto(bs, name, NULL); } aio_context_release(ctx); if (err < 0) { diff --git a/include/block/snapshot.h b/include/block/snapshot.h index e5c0553115..aeb80405e8 100644 --- a/include/block/snapshot.h +++ b/include/block/snapshot.h @@ -57,7 +57,8 @@ int bdrv_can_snapshot(BlockDriverState *bs); int bdrv_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info); int bdrv_snapshot_goto(BlockDriverState *bs, - const char *snapshot_id); + const char *snapshot_id, + Error **errp); int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id, const char *name, diff --git a/qemu-img.c b/qemu-img.c index 02a6e27beb..68b375f998 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -2989,10 +2989,10 @@ static int img_snapshot(int argc, char **argv) break; case SNAPSHOT_APPLY: - ret = bdrv_snapshot_goto(bs, snapshot_name); + ret = bdrv_snapshot_goto(bs, snapshot_name, &err); if (ret) { - error_report("Could not apply snapshot '%s': %d (%s)", - snapshot_name, ret, strerror(-ret)); + error_reportf_err(err, "Could not apply snapshot '%s': ", + snapshot_name); } break; From 2b624fe079ee7123797f6c685e714795665c0e01 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 20 Nov 2017 15:36:48 +0100 Subject: [PATCH 4/7] block: Add errp to bdrv_all_goto_snapshot() Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Denis V. Lunev Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: John Snow --- block/snapshot.c | 11 ++++++----- include/block/snapshot.h | 3 ++- migration/savevm.c | 6 +++--- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/block/snapshot.c b/block/snapshot.c index 75562df4cc..8585599579 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -467,9 +467,10 @@ fail: } -int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs) +int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs, + Error **errp) { - int err = 0; + int ret = 0; BlockDriverState *bs; BdrvNextIterator it; @@ -478,10 +479,10 @@ int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs) aio_context_acquire(ctx); if (bdrv_can_snapshot(bs)) { - err = bdrv_snapshot_goto(bs, name, NULL); + ret = bdrv_snapshot_goto(bs, name, errp); } aio_context_release(ctx); - if (err < 0) { + if (ret < 0) { bdrv_next_cleanup(&it); goto fail; } @@ -489,7 +490,7 @@ int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs) fail: *first_bad_bs = bs; - return err; + return ret; } int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs) diff --git a/include/block/snapshot.h b/include/block/snapshot.h index aeb80405e8..9407799941 100644 --- a/include/block/snapshot.h +++ b/include/block/snapshot.h @@ -84,7 +84,8 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs); int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bsd_bs, Error **err); -int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bsd_bs); +int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs, + Error **errp); int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs); int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, BlockDriverState *vm_state_bs, diff --git a/migration/savevm.c b/migration/savevm.c index 4a88228614..192f2d82cd 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2346,10 +2346,10 @@ int load_snapshot(const char *name, Error **errp) /* Flush all IO requests so they don't interfere with the new state. */ bdrv_drain_all_begin(); - ret = bdrv_all_goto_snapshot(name, &bs); + ret = bdrv_all_goto_snapshot(name, &bs, errp); if (ret < 0) { - error_setg(errp, "Error %d while activating snapshot '%s' on '%s'", - ret, name, bdrv_get_device_name(bs)); + error_prepend(errp, "Could not load snapshot '%s' on '%s': ", + name, bdrv_get_device_name(bs)); goto err_drain; } From 70a5afedd64c3f0d3b5feae6b40b30f3e8d13e4b Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 20 Nov 2017 15:41:31 +0100 Subject: [PATCH 5/7] block: Error out on load_vm with active dirty bitmaps Loading a snapshot invalidates the bitmap. Just marking all blocks dirty is not a useful response in practice, instead the user needs to be aware that we switch to a completely different state. If they are okay with losing the dirty bitmap, they can just explicitly delete it. This effectively reverts commit 04dec3c3ae5. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Denis V. Lunev Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: John Snow --- block/snapshot.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/block/snapshot.c b/block/snapshot.c index 8585599579..8cb70dbad5 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -182,25 +182,16 @@ int bdrv_snapshot_goto(BlockDriverState *bs, { BlockDriver *drv = bs->drv; int ret, open_ret; - int64_t len; if (!drv) { error_setg(errp, "Block driver is closed"); return -ENOMEDIUM; } - len = bdrv_getlength(bs); - if (len < 0) { - error_setg_errno(errp, -len, "Cannot get block device size"); - return len; + if (!QLIST_EMPTY(&bs->dirty_bitmaps)) { + error_setg(errp, "Device has active dirty bitmaps"); + return -EBUSY; } - /* We should set all bits in all enabled dirty bitmaps, because dirty - * bitmaps reflect active state of disk and snapshot switch operation - * actually dirties active state. - * TODO: It may make sense not to set all bits but analyze block status of - * current state and destination snapshot and do not set bits corresponding - * to both-zero or both-unallocated areas. */ - bdrv_set_dirty(bs, 0, len); if (drv->bdrv_snapshot_goto) { ret = drv->bdrv_snapshot_goto(bs, snapshot_id); From 50a3efb0f05bcfbe04201d4ebac0b96551a1b551 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Mon, 6 Nov 2017 16:53:45 +0200 Subject: [PATCH 6/7] block: Close a BlockDriverState completely even when bs->drv is NULL bdrv_close() skips much of its logic when bs->drv is NULL. This is fine when we're closing a BlockDriverState that has just been created (because e.g the initialization process failed), but it's not enough in other cases. For example, when a valid qcow2 image is found to be corrupted then QEMU marks it as such in the file header and then sets bs->drv to NULL in order to make the BlockDriverState unusable. When that BDS is later closed then many of its data structures are not freed (leaking their memory) and none of its children are detached. This results in bdrv_close_all() failing to close all BDSs and making this assertion fail when QEMU is being shut down: bdrv_close_all: Assertion `QTAILQ_EMPTY(&all_bdrv_states)' failed. This patch makes bdrv_close() do the full uninitialization process in all cases. This fixes the problem with corrupted images and still works fine with freshly created BDSs. Signed-off-by: Alberto Garcia Message-id: 20171106145345.12038-1-berto@igalia.com Reviewed-by: Eric Blake Signed-off-by: Max Reitz --- block.c | 65 +++++++++++++++++++------------------- tests/qemu-iotests/060 | 13 ++++++++ tests/qemu-iotests/060.out | 12 +++++++ 3 files changed, 57 insertions(+), 33 deletions(-) diff --git a/block.c b/block.c index 68b724206d..9a1a0d1e73 100644 --- a/block.c +++ b/block.c @@ -3198,6 +3198,7 @@ void bdrv_reopen_abort(BDRVReopenState *reopen_state) static void bdrv_close(BlockDriverState *bs) { BdrvAioNotifier *ban, *ban_next; + BdrvChild *child, *next; assert(!bs->job); assert(!bs->refcnt); @@ -3207,43 +3208,41 @@ static void bdrv_close(BlockDriverState *bs) bdrv_drain(bs); /* in case flush left pending I/O */ if (bs->drv) { - BdrvChild *child, *next; - bs->drv->bdrv_close(bs); bs->drv = NULL; - - bdrv_set_backing_hd(bs, NULL, &error_abort); - - if (bs->file != NULL) { - bdrv_unref_child(bs, bs->file); - bs->file = NULL; - } - - QLIST_FOREACH_SAFE(child, &bs->children, next, next) { - /* TODO Remove bdrv_unref() from drivers' close function and use - * bdrv_unref_child() here */ - if (child->bs->inherits_from == bs) { - child->bs->inherits_from = NULL; - } - bdrv_detach_child(child); - } - - g_free(bs->opaque); - bs->opaque = NULL; - atomic_set(&bs->copy_on_read, 0); - bs->backing_file[0] = '\0'; - bs->backing_format[0] = '\0'; - bs->total_sectors = 0; - bs->encrypted = false; - bs->sg = false; - QDECREF(bs->options); - QDECREF(bs->explicit_options); - bs->options = NULL; - bs->explicit_options = NULL; - QDECREF(bs->full_open_options); - bs->full_open_options = NULL; } + bdrv_set_backing_hd(bs, NULL, &error_abort); + + if (bs->file != NULL) { + bdrv_unref_child(bs, bs->file); + bs->file = NULL; + } + + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { + /* TODO Remove bdrv_unref() from drivers' close function and use + * bdrv_unref_child() here */ + if (child->bs->inherits_from == bs) { + child->bs->inherits_from = NULL; + } + bdrv_detach_child(child); + } + + g_free(bs->opaque); + bs->opaque = NULL; + atomic_set(&bs->copy_on_read, 0); + bs->backing_file[0] = '\0'; + bs->backing_format[0] = '\0'; + bs->total_sectors = 0; + bs->encrypted = false; + bs->sg = false; + QDECREF(bs->options); + QDECREF(bs->explicit_options); + bs->options = NULL; + bs->explicit_options = NULL; + QDECREF(bs->full_open_options); + bs->full_open_options = NULL; + bdrv_release_named_dirty_bitmaps(bs); assert(QLIST_EMPTY(&bs->dirty_bitmaps)); diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060 index 1eca09417b..14797dd3b0 100755 --- a/tests/qemu-iotests/060 +++ b/tests/qemu-iotests/060 @@ -426,6 +426,19 @@ echo '--- Repairing ---' _check_test_img -q -r all _check_test_img -r all +echo +echo "=== Testing the QEMU shutdown with a corrupted image ===" +echo +_make_test_img 64M +poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" +echo "{'execute': 'qmp_capabilities'} + {'execute': 'human-monitor-command', + 'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}} + {'execute': 'quit'}" \ + | $QEMU -qmp stdio -nographic -nodefaults \ + -drive if=none,node-name=drive,file="$TEST_IMG",driver=qcow2 \ + | _filter_qmp | _filter_qemu_io + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out index 56f5eb15d8..c4cb7c665e 100644 --- a/tests/qemu-iotests/060.out +++ b/tests/qemu-iotests/060.out @@ -399,4 +399,16 @@ The following inconsistencies were found and repaired: Double checking the fixed image now... No errors were found on the image. + +=== Testing the QEMU shutdown with a corrupted image === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with refcount table); further corruption events will be suppressed +QMP_VERSION +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_IMAGE_CORRUPTED", "data": {"device": "none0", "msg": "Preventing invalid write on metadata (overlaps with refcount table)", "offset": 65536, "node-name": "drive", "fatal": true, "size": 65536}} +write failed: Input/output error +{"return": ""} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} *** done From 2807746ff178fe2e62638755693ece57aeeacc05 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Fri, 17 Nov 2017 13:04:22 -0600 Subject: [PATCH 7/7] iotests: Fix 176 on 32-bit host The contents of a qcow2 bitmap are rounded up to a size that matches the number of bits available for the granularity, but that granularity differs for 32-bit hosts (our default 64k cluster allows for 2M bitmap coverage per 'long') and 64-bit hosts (4M bitmap per 'long'). If the image is a multiple of 2M but not 4M, then the number of bytes occupied by the array of longs in memory differs between architecture, thus resulting in different SHA256 hashes. Furthermore (but untested by me), if our computation of the SHA256 hash is at all endian-dependent because of how we store data in memory, that's another variable we'd have to account for (ideally, we specified the bitmap stored in qcow2 as fixed-endian on disk, because the same qcow2 file must be usable across any architecture; but that says nothing about how we represent things in memory). But we already have test 165 to validate that bitmaps are stored correctly on disk, while this test is merely testing that the bitmap exists. So for this test, the easiest solution is to filter out the actual hash value. Broken in commit 4096974e. Reported-by: Max Reitz Signed-off-by: Eric Blake Message-id: 20171117190422.23626-1-eblake@redhat.com Reviewed-by: John Snow Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- tests/qemu-iotests/176 | 3 ++- tests/qemu-iotests/176.out | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/qemu-iotests/176 b/tests/qemu-iotests/176 index 0f31a20294..b8dc17c592 100755 --- a/tests/qemu-iotests/176 +++ b/tests/qemu-iotests/176 @@ -52,7 +52,8 @@ _supported_os Linux function run_qemu() { $QEMU -nographic -qmp stdio -serial none "$@" 2>&1 \ - | _filter_testdir | _filter_qmp | _filter_qemu + | _filter_testdir | _filter_qmp | _filter_qemu \ + | sed 's/"sha256": ".\{64\}"/"sha256": HASH/' } for reason in snapshot bitmap; do diff --git a/tests/qemu-iotests/176.out b/tests/qemu-iotests/176.out index e62085cd0a..f03a2e776c 100644 --- a/tests/qemu-iotests/176.out +++ b/tests/qemu-iotests/176.out @@ -205,7 +205,7 @@ Offset Length File QMP_VERSION {"return": {}} {"return": {}} -{"return": {"sha256": "e12600978d86b5a453861ae5c17d275204673fef3874b7c3c5433c6153d84706"}} +{"return": {"sha256": HASH}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} @@ -255,7 +255,7 @@ Offset Length File QMP_VERSION {"return": {}} {"return": {}} -{"return": {"sha256": "e12600978d86b5a453861ae5c17d275204673fef3874b7c3c5433c6153d84706"}} +{"return": {"sha256": HASH}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} @@ -305,7 +305,7 @@ Offset Length File QMP_VERSION {"return": {}} {"return": {}} -{"return": {"sha256": "e12600978d86b5a453861ae5c17d275204673fef3874b7c3c5433c6153d84706"}} +{"return": {"sha256": HASH}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} @@ -352,7 +352,7 @@ Offset Length File QMP_VERSION {"return": {}} {"return": {}} -{"return": {"sha256": "e12600978d86b5a453861ae5c17d275204673fef3874b7c3c5433c6153d84706"}} +{"return": {"sha256": HASH}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} *** done