From 7db1689c35a6f7477c691c31232ec10725ba9dfe Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Fri, 25 Apr 2014 17:02:32 -0400 Subject: [PATCH 01/31] block: fix qemu-img --help invocation This fixes a bug introduced in commit ac1307ab, that caused the '--help' option to not be recognized as a valid command, and not print any help. Signed-off-by: Jeff Cody Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- qemu-img.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index 968b4c8e83..d884324c8f 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -2789,6 +2789,12 @@ int main(int argc, char **argv) { const img_cmd_t *cmd; const char *cmdname; + int c; + int option_index = 0; + static const struct option long_options[] = { + {"help", no_argument, 0, 'h'}, + {0, 0, 0, 0} + }; #ifdef CONFIG_POSIX signal(SIGPIPE, SIG_IGN); @@ -2803,15 +2809,20 @@ int main(int argc, char **argv) error_exit("Not enough arguments"); } cmdname = argv[1]; - argc--; argv++; /* find the command */ for(cmd = img_cmds; cmd->name != NULL; cmd++) { if (!strcmp(cmdname, cmd->name)) { - return cmd->handler(argc, argv); + return cmd->handler(argc - 1, argv + 1); } } + c = getopt_long(argc, argv, "h", long_options, &option_index); + + if (c == 'h') { + help(); + } + /* not found */ error_exit("Command not found: %s", cmdname); } From f0e973601250a3f1579ff77230c419c562958fa8 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 28 Apr 2014 10:59:28 +0800 Subject: [PATCH 02/31] mirror: Use DIV_ROUND_UP Although bdrv_getlength() was just called above this, and checked for error, it is better to just use the value we already get, and use DIV_ROUND_UP. Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/mirror.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/mirror.c b/block/mirror.c index 36f4f8e8bd..95366adb3a 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -329,7 +329,7 @@ static void coroutine_fn mirror_run(void *opaque) return; } - length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity; + length = DIV_ROUND_UP(s->common.len, s->granularity); s->in_flight_bitmap = bitmap_new(length); /* If we have no backing file yet in the destination, we cannot let From 5f6979cba9f63480d38e9deb72b565c6781ac0e8 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Mon, 28 Apr 2014 14:37:18 -0400 Subject: [PATCH 03/31] block: Add '--version' option to qemu-img This allows qemu-img to print out version information, without needing to print the long help wall of text. While there, perform some minor whitespace cleanup, and remove the unused option_index variable in the call to getopt_long(). Reported-by: Eric Blake Signed-off-by: Jeff Cody Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- qemu-img.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index d884324c8f..96f44638b7 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -33,6 +33,9 @@ #include "block/qapi.h" #include +#define QEMU_IMG_VERSION "qemu-img version " QEMU_VERSION \ + ", Copyright (c) 2004-2008 Fabrice Bellard\n" + typedef struct img_cmd_t { const char *name; int (*handler)(int argc, char **argv); @@ -75,7 +78,7 @@ static void QEMU_NORETURN GCC_FMT_ATTR(1, 2) error_exit(const char *fmt, ...) static void QEMU_NORETURN help(void) { const char *help_msg = - "qemu-img version " QEMU_VERSION ", Copyright (c) 2004-2008 Fabrice Bellard\n" + QEMU_IMG_VERSION "usage: qemu-img command [command options]\n" "QEMU disk image utility\n" "\n" @@ -2790,9 +2793,9 @@ int main(int argc, char **argv) const img_cmd_t *cmd; const char *cmdname; int c; - int option_index = 0; static const struct option long_options[] = { {"help", no_argument, 0, 'h'}, + {"version", no_argument, 0, 'v'}, {0, 0, 0, 0} }; @@ -2811,17 +2814,21 @@ int main(int argc, char **argv) cmdname = argv[1]; /* find the command */ - for(cmd = img_cmds; cmd->name != NULL; cmd++) { + for (cmd = img_cmds; cmd->name != NULL; cmd++) { if (!strcmp(cmdname, cmd->name)) { return cmd->handler(argc - 1, argv + 1); } } - c = getopt_long(argc, argv, "h", long_options, &option_index); + c = getopt_long(argc, argv, "h", long_options, NULL); if (c == 'h') { help(); } + if (c == 'v') { + printf(QEMU_IMG_VERSION); + return 0; + } /* not found */ error_exit("Command not found: %s", cmdname); From e855e4fb7b97f7f605e1f44427b98022e39e6f8f Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Mon, 28 Apr 2014 18:29:54 -0400 Subject: [PATCH 04/31] block: Ignore duplicate or NULL format_name in bdrv_iterate_format Some block drivers have multiple BlockDriver instances with identical format_name fields (e.g. gluster, nbd). Both qemu-img and qemu will use bdrv_iterate_format() to list the supported formats when a help option is invoked. As protocols and formats may register multiple drivers, redundant listings of formats occur (e.g., "Supported formats: ... gluster gluster gluster gluster ... "). Since the list of driver formats will be small, this performs a simple linear search on format_name, and ignores any duplicates. The end result change is that the iterator will no longer receive duplicate string names, nor will it receive NULL pointers. Signed-off-by: Jeff Cody Signed-off-by: Kevin Wolf --- block.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index 4745712d22..100fa8679a 100644 --- a/block.c +++ b/block.c @@ -3601,10 +3601,25 @@ void bdrv_iterate_format(void (*it)(void *opaque, const char *name), void *opaque) { BlockDriver *drv; + int count = 0; + const char **formats = NULL; QLIST_FOREACH(drv, &bdrv_drivers, list) { - it(opaque, drv->format_name); + if (drv->format_name) { + bool found = false; + int i = count; + while (formats && i && !found) { + found = !strcmp(formats[--i], drv->format_name); + } + + if (!found) { + formats = g_realloc(formats, (count + 1) * sizeof(char *)); + formats[count++] = drv->format_name; + it(opaque, drv->format_name); + } + } } + g_free(formats); } /* This function is to find block backend bs */ From 373df5b135b4a54e0abb394e9e703fef3ded093c Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Tue, 29 Apr 2014 18:09:09 +0800 Subject: [PATCH 05/31] mirror: Fix resource leak when bdrv_getlength fails The direct return will skip releasing of all the resouces at immediate_exit, don't miss that. Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/mirror.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index 95366adb3a..403714c1ea 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -325,8 +325,8 @@ static void coroutine_fn mirror_run(void *opaque) s->common.len = bdrv_getlength(bs); if (s->common.len <= 0) { - block_job_completed(&s->common, s->common.len); - return; + ret = s->common.len; + goto immediate_exit; } length = DIV_ROUND_UP(s->common.len, s->granularity); From c3cc95bd155d51fc1f9b7259b3bff20a361a1cca Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Tue, 29 Apr 2014 18:14:17 +0800 Subject: [PATCH 06/31] mirror: Check for bdrv_get_info result bdrv_get_info could fail. Add check before using the returned value. Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/mirror.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/block/mirror.c b/block/mirror.c index 403714c1ea..1c38aa8f77 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -339,7 +339,10 @@ static void coroutine_fn mirror_run(void *opaque) bdrv_get_backing_filename(s->target, backing_filename, sizeof(backing_filename)); if (backing_filename[0] && !s->target->backing_hd) { - bdrv_get_info(s->target, &bdi); + ret = bdrv_get_info(s->target, &bdi); + if (ret < 0) { + goto immediate_exit; + } if (s->granularity < bdi.cluster_size) { s->buf_size = MAX(s->buf_size, bdi.cluster_size); s->cow_bitmap = bitmap_new(length); From c55752745536712f778e9a0d73a078bdb0360df2 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Thu, 10 Apr 2014 16:47:39 -0400 Subject: [PATCH 07/31] block: qemu-iotests - fix image cleanup when using spaced pathnames The _rm_test_img() function in common.rc did not quote the image file, which left droppings in the scratch directory (and performed a potentially unsafe rm -f). This adds the necessary quotes. Reviewed-by: Benoit Canet Signed-off-by: Jeff Cody Signed-off-by: Kevin Wolf --- tests/qemu-iotests/common.rc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 7f00883cad..195c5646aa 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -178,10 +178,10 @@ _rm_test_img() local img=$1 if [ "$IMGFMT" = "vmdk" ]; then # Remove all the extents for vmdk - $QEMU_IMG info $img 2>/dev/null | grep 'filename:' | cut -f 2 -d: \ + "$QEMU_IMG" info "$img" 2>/dev/null | grep 'filename:' | cut -f 2 -d: \ | xargs -I {} rm -f "{}" fi - rm -f $img + rm -f "$img" } _cleanup_test_img() From cc8a7e560ca7932bacabc8a7113ac73976917848 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Thu, 10 Apr 2014 16:47:40 -0400 Subject: [PATCH 08/31] block: qemu-iotests: make test 019 and 086 work with spaced pathnames Both tests 019 and 086 need proper quotations to work with pathnames that contain spaces. Reviewed-by: Benoit Canet Reviewed-by: Fam Zheng Signed-off-by: Jeff Cody Signed-off-by: Kevin Wolf --- tests/qemu-iotests/019 | 2 +- tests/qemu-iotests/086 | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/qemu-iotests/019 b/tests/qemu-iotests/019 index e67445c754..f5ecbf5451 100755 --- a/tests/qemu-iotests/019 +++ b/tests/qemu-iotests/019 @@ -96,7 +96,7 @@ mv "$TEST_IMG" "$TEST_IMG.orig" for backing_option in "-B " "-o backing_file="; do echo - echo Testing conversion with $backing_option$TEST_IMG.base | _filter_testdir | _filter_imgfmt + echo Testing conversion with $backing_option"$TEST_IMG.base" | _filter_testdir | _filter_imgfmt echo $QEMU_IMG convert -O $IMGFMT $backing_option"$TEST_IMG.base" "$TEST_IMG.orig" "$TEST_IMG" diff --git a/tests/qemu-iotests/086 b/tests/qemu-iotests/086 index 48fe85bc43..d9a80cf863 100755 --- a/tests/qemu-iotests/086 +++ b/tests/qemu-iotests/086 @@ -51,10 +51,10 @@ function run_qemu_img() size=128M _make_test_img $size -$QEMU_IO -c 'write 0 1M' $TEST_IMG | _filter_qemu_io -$QEMU_IO -c 'write 2M 1M' $TEST_IMG | _filter_qemu_io -$QEMU_IO -c 'write 4M 1M' $TEST_IMG | _filter_qemu_io -$QEMU_IO -c 'write 32M 1M' $TEST_IMG | _filter_qemu_io +$QEMU_IO -c 'write 0 1M' "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c 'write 2M 1M' "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c 'write 4M 1M' "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c 'write 32M 1M' "$TEST_IMG" | _filter_qemu_io $QEMU_IMG convert -p -O $IMGFMT -f $IMGFMT "$TEST_IMG" "$TEST_IMG".base 2>&1 |\ _filter_testdir | sed -e 's/\r/\n/g' From c883db0df9dbf26471c1418a632b216b0c1104f1 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 29 Apr 2014 16:12:30 +0200 Subject: [PATCH 09/31] qcow2: Fix discard discard_single_l2() should not implement its own version of qcow2_get_cluster_type(), but rather rely on this already existing function. By doing so, it will work for compressed clusters as well (which it did not so far). Also, rename "old_offset" to "old_l2_entry", as both are quite different (and the value is indeed of the latter kind). Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 331ab08022..b746429def 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1360,9 +1360,9 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); for (i = 0; i < nb_clusters; i++) { - uint64_t old_offset; + uint64_t old_l2_entry; - old_offset = be64_to_cpu(l2_table[l2_index + i]); + old_l2_entry = be64_to_cpu(l2_table[l2_index + i]); /* * Make sure that a discarded area reads back as zeroes for v3 images @@ -1373,12 +1373,22 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, * TODO We might want to use bdrv_get_block_status(bs) here, but we're * holding s->lock, so that doesn't work today. */ - if (old_offset & QCOW_OFLAG_ZERO) { - continue; - } + switch (qcow2_get_cluster_type(old_l2_entry)) { + case QCOW2_CLUSTER_UNALLOCATED: + if (!bs->backing_hd) { + continue; + } + break; - if ((old_offset & L2E_OFFSET_MASK) == 0 && !bs->backing_hd) { - continue; + case QCOW2_CLUSTER_ZERO: + continue; + + case QCOW2_CLUSTER_NORMAL: + case QCOW2_CLUSTER_COMPRESSED: + break; + + default: + abort(); } /* First remove L2 entries */ @@ -1390,7 +1400,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, } /* Then decrease the refcount */ - qcow2_free_any_clusters(bs, old_offset, 1, type); + qcow2_free_any_clusters(bs, old_l2_entry, 1, type); } ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); From cffb12051a26e5494ed009c99db7858b2aab7099 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 29 Apr 2014 16:12:31 +0200 Subject: [PATCH 10/31] iotests: Discarding compressed clusters on qcow2 Add a test which discards a compressed cluster on qcow2. This should work without any problems. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- tests/qemu-iotests/090 | 61 ++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/090.out | 12 ++++++++ tests/qemu-iotests/group | 1 + 3 files changed, 74 insertions(+) create mode 100755 tests/qemu-iotests/090 create mode 100644 tests/qemu-iotests/090.out diff --git a/tests/qemu-iotests/090 b/tests/qemu-iotests/090 new file mode 100755 index 0000000000..8d032f8111 --- /dev/null +++ b/tests/qemu-iotests/090 @@ -0,0 +1,61 @@ +#!/bin/bash +# +# Test for discarding compressed clusters on qcow2 images +# +# Copyright (C) 2014 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt qcow2 +_supported_proto file +_supported_os Linux + +IMG_SIZE=128K + +_make_test_img $IMG_SIZE + +$QEMU_IO -c 'write -c -P 42 0 64k' \ + -c 'write -c -P 23 64k 64k' \ + -c 'discard 64k 64k' \ + "$TEST_IMG" | _filter_qemu_io + +$QEMU_IO -c 'read -P 0 64k 64k' "$TEST_IMG" | _filter_qemu_io + +_check_test_img + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/090.out b/tests/qemu-iotests/090.out new file mode 100644 index 0000000000..2df93e0d97 --- /dev/null +++ b/tests/qemu-iotests/090.out @@ -0,0 +1,12 @@ +QA output created by 090 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=131072 +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 65536 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +discard 65536/65536 bytes at offset 65536 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 65536 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +No errors were found on the image. +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 864643d256..ae096638b8 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -95,3 +95,4 @@ 086 rw auto quick 087 rw auto 088 rw auto +090 rw auto quick From 0b50cc885381fc6794590dbbb40665e32f9292f8 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 11 Apr 2014 21:29:52 +0200 Subject: [PATCH 11/31] block: Create bdrv_inherited_flags() Instead of having bdrv_open_flags() as a function that creates flags for several unrelated places and then adding open-coded flags on top, create a new function that derives the flags for bs->file from the flags for bs. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- block.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 100fa8679a..01282c25d7 100644 --- a/block.c +++ b/block.c @@ -774,6 +774,30 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs) bs->copy_on_read--; } +/* + * Returns the flags that bs->file should get, based on the given flags for + * the parent BDS + */ +static int bdrv_inherited_flags(int flags) +{ + /* Enable protocol handling, disable format probing for bs->file */ + flags |= BDRV_O_PROTOCOL; + + /* Our block drivers take care to send flushes and respect unmap policy, + * so we can enable both unconditionally on lower layers. */ + flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP; + + /* The backing file of a temporary snapshot is read-only */ + if (flags & BDRV_O_SNAPSHOT) { + flags &= ~BDRV_O_RDWR; + } + + /* Clear flags that only apply to the top layer */ + flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); + + return flags; +} + static int bdrv_open_flags(BlockDriverState *bs, int flags) { int open_flags = flags | BDRV_O_CACHE_WB; @@ -1333,8 +1357,8 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, assert(file == NULL); ret = bdrv_open_image(&file, filename, options, "file", - bdrv_open_flags(bs, flags | BDRV_O_UNMAP) | - BDRV_O_PROTOCOL, true, &local_err); + bdrv_inherited_flags(flags), + true, &local_err); if (ret < 0) { goto unlink_and_fail; } From 317fc44ef2bfa87e96adecf035ed136ed9d78c8f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 25 Apr 2014 13:27:34 +0200 Subject: [PATCH 12/31] block: Create bdrv_backing_flags() Instead of manipulation flags inline, move the derivation of the flags of a backing file into a new function next to the existing functions that derive flags for bs->file and for the block driver open function. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- block.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/block.c b/block.c index 01282c25d7..c6d5ab9cb8 100644 --- a/block.c +++ b/block.c @@ -798,6 +798,21 @@ static int bdrv_inherited_flags(int flags) return flags; } +/* + * Returns the flags that bs->backing_hd should get, based on the given flags + * for the parent BDS + */ +static int bdrv_backing_flags(int flags) +{ + /* backing files always opened read-only */ + flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ); + + /* snapshot=on is handled on the top layer */ + flags &= ~BDRV_O_SNAPSHOT; + + return flags; +} + static int bdrv_open_flags(BlockDriverState *bs, int flags) { int open_flags = flags | BDRV_O_CACHE_WB; @@ -1093,7 +1108,7 @@ fail: int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) { char *backing_filename = g_malloc0(PATH_MAX); - int back_flags, ret = 0; + int ret = 0; BlockDriver *back_drv = NULL; Error *local_err = NULL; @@ -1121,14 +1136,10 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) back_drv = bdrv_find_format(bs->backing_format); } - /* backing files always opened read-only */ - back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | - BDRV_O_COPY_ON_READ); - assert(bs->backing_hd == NULL); ret = bdrv_open(&bs->backing_hd, *backing_filename ? backing_filename : NULL, NULL, options, - back_flags, back_drv, &local_err); + bdrv_backing_flags(bs->open_flags), back_drv, &local_err); if (ret < 0) { bs->backing_hd = NULL; bs->open_flags |= BDRV_O_NO_BACKING; From 5669b44de5b3b607a3a4749e0c8c5ddfd723e76b Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 11 Apr 2014 21:36:45 +0200 Subject: [PATCH 13/31] block: Remove BDRV_O_COPY_ON_READ for bs->file Copy on Read makes sense on the format level where backing files are implemented, but it's not required on the protocol level. While it shouldn't actively break anything to have COR enabled on both layers, needless serialisation and allocation checks may impact performance. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block.c b/block.c index c6d5ab9cb8..9f88bcc088 100644 --- a/block.c +++ b/block.c @@ -793,7 +793,7 @@ static int bdrv_inherited_flags(int flags) } /* Clear flags that only apply to the top layer */ - flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); + flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ); return flags; } From 8bfea15ddac3a0ae832f181653c36e020f24f007 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 11 Apr 2014 19:16:36 +0200 Subject: [PATCH 14/31] block: Unlink temporary files in raw-posix/win32 Instead of having unlink() calls in the generic block layer, where we aren't even guarateed to have a file name, move them to those block drivers that are actually used and that always have a filename. Gets us rid of some #ifdefs as well. The patch also converts bs->is_temporary to a new BDRV_O_TEMPORARY open flag so that it is inherited in the protocol layer and the raw-posix and raw-win32 drivers can unlink the file. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- block.c | 36 ++++++++++-------------------------- block/raw-posix.c | 5 ++++- block/raw-win32.c | 3 +++ include/block/block.h | 1 + include/block/block_int.h | 1 - 5 files changed, 18 insertions(+), 28 deletions(-) diff --git a/block.c b/block.c index 9f88bcc088..c094075e4b 100644 --- a/block.c +++ b/block.c @@ -808,7 +808,7 @@ static int bdrv_backing_flags(int flags) flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ); /* snapshot=on is handled on the top layer */ - flags &= ~BDRV_O_SNAPSHOT; + flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY); return flags; } @@ -831,7 +831,7 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags) /* * Snapshots should be writable. */ - if (bs->is_temporary) { + if (flags & BDRV_O_TEMPORARY) { open_flags |= BDRV_O_RDWR; } @@ -990,13 +990,6 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, bdrv_refresh_limits(bs); assert(bdrv_opt_mem_align(bs) != 0); assert((bs->request_alignment != 0) || bs->sg); - -#ifndef _WIN32 - if (bs->is_temporary) { - assert(bs->filename[0] != '\0'); - unlink(bs->filename); - } -#endif return 0; free_and_fail: @@ -1267,10 +1260,10 @@ void bdrv_append_temp_snapshot(BlockDriverState *bs, Error **errp) qstring_from_str(tmp_filename)); bs_snapshot = bdrv_new("", &error_abort); - bs_snapshot->is_temporary = 1; ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options, - bs->open_flags & ~BDRV_O_SNAPSHOT, bdrv_qcow2, &local_err); + (bs->open_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY, + bdrv_qcow2, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto out; @@ -1371,7 +1364,7 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, bdrv_inherited_flags(flags), true, &local_err); if (ret < 0) { - goto unlink_and_fail; + goto fail; } /* Find the right image format driver */ @@ -1382,7 +1375,7 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, if (!drv) { error_setg(errp, "Invalid driver: '%s'", drvname); ret = -EINVAL; - goto unlink_and_fail; + goto fail; } } @@ -1392,18 +1385,18 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, } else { error_setg(errp, "Must specify either driver or file"); ret = -EINVAL; - goto unlink_and_fail; + goto fail; } } if (!drv) { - goto unlink_and_fail; + goto fail; } /* Open the image */ ret = bdrv_open_common(bs, file, options, flags, drv, &local_err); if (ret < 0) { - goto unlink_and_fail; + goto fail; } if (file && (bs->file != file)) { @@ -1465,14 +1458,10 @@ done: *pbs = bs; return 0; -unlink_and_fail: +fail: if (file != NULL) { bdrv_unref(file); } - if (bs->is_temporary) { - unlink(filename); - } -fail: QDECREF(bs->options); QDECREF(options); bs->options = NULL; @@ -1752,11 +1741,6 @@ void bdrv_close(BlockDriverState *bs) } bs->drv->bdrv_close(bs); g_free(bs->opaque); -#ifdef _WIN32 - if (bs->is_temporary) { - unlink(bs->filename); - } -#endif bs->opaque = NULL; bs->drv = NULL; bs->copy_on_read = 0; diff --git a/block/raw-posix.c b/block/raw-posix.c index 1688e16c64..3ce026db58 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -366,7 +366,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, BDRVRawState *s = bs->opaque; QemuOpts *opts; Error *local_err = NULL; - const char *filename; + const char *filename = NULL; int fd, ret; struct stat st; @@ -446,6 +446,9 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, ret = 0; fail: + if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) { + unlink(filename); + } qemu_opts_del(opts); return ret; } diff --git a/block/raw-win32.c b/block/raw-win32.c index 48cb2c2258..064ea3123c 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -390,6 +390,9 @@ static void raw_close(BlockDriverState *bs) { BDRVRawState *s = bs->opaque; CloseHandle(s->hfile); + if (bs->open_flags & BDRV_O_TEMPORARY) { + unlink(bs->filename); + } } static int raw_truncate(BlockDriverState *bs, int64_t offset) diff --git a/include/block/block.h b/include/block/block.h index c12808a252..467fb2ba0a 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -92,6 +92,7 @@ typedef enum { #define BDRV_O_RDWR 0x0002 #define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */ +#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */ #define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ #define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */ #define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */ diff --git a/include/block/block_int.h b/include/block/block_int.h index cd5bc7308a..9ffcb698d0 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -299,7 +299,6 @@ struct BlockDriverState { char backing_file[1024]; /* if non zero, the image is a diff of this file image */ char backing_format[16]; /* if non-zero and backing_file exists */ - int is_temporary; BlockDriverState *backing_hd; BlockDriverState *file; From 7e3d98dd31b6de53923bfb0f04b23b42a94f3829 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 25 Apr 2014 12:36:07 +0200 Subject: [PATCH 15/31] Revert "block: another bdrv_append fix" This reverts commit 3a389e7926750cba5c83f662b1941888b2bebc04. The commit was wrong and what it tried to fix just works today without any change. What the commit tried to fix: When creating live snapshots, the new image file is opened with BDRV_O_NO_BACKING because the whole backing chain is already opened. It is then appended to the chain using bdrv_append(). The result of this was that the image had a backing file, but BDRV_O_NO_BACKING was still set. This is obviously inconsistent. There used to be some places in qemu that closed and image and then opened it again, with its old flags (a bdrv_open()/close() sequence involves reopening the whole backing file chain, too). In this case the BDRV_O_NO_BACKING flag meant that the backing chain wasn't reopened and only the top layer was left. (Most, but not all of these places are replaced by bdrv_reopen() today, which doesn't touch the backing files at all.) Other places that looked at bs->open_flags weren't interested in BDRV_O_NO_BACKING, so no breakage there. What it actually did: The commit moved the BDRV_O_NO_BACKING away to the backing file. Because the bdrv_open()/close() sequences only looked at the flags of the top level BlockDriverState and used it for the whole chain, the flag didn't hurt there any more. Obviously, it is still inconsistent because the backing file may have another backing file, but without practical impact. At the same time, it swapped all other flags. This is practically irrelevant as long as live snapshots only allow opening the new layer with the same flags as the old top layer. It still doesn't make any sense, and it is a time bomb that explodes as soon as the flags can differ. bdrv_append_temp_snapshot() is such a case: It adds the new flag BDRV_O_TEMPORARY for the temporary snapshot. The swapping of commit 3a389e79 results in the following nonsensical configuration: bs->open_flags: BDRV_O_TEMPORARY cleared bs->file->open_flags: BDRV_O_TEMPORARY set bs->backing_hd->open_flags: BDRV_O_TEMPORARY set bs->backing_hd->file->open_flags: BDRV_O_TEMPORARY cleared We're still lucky because the format layer ignores the flag and the protocol layer happens to get the right value, but sooner or later this is bound to go wrong... What the right fix would have been: Simply clear the BDRV_O_NO_BACKING flag when the BlockDriverState is appended to an existing backing file chain, because now it does have a backing file. Commit 4ddc07ca already implemented this silently in bdrv_append(), so we don't have to come up with a new fix. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- block.c | 1 - 1 file changed, 1 deletion(-) diff --git a/block.c b/block.c index c094075e4b..9f6f07e75c 100644 --- a/block.c +++ b/block.c @@ -1864,7 +1864,6 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, BlockDriverState *bs_src) { /* move some fields that need to stay attached to the device */ - bs_dest->open_flags = bs_src->open_flags; /* dev info */ bs_dest->dev_ops = bs_src->dev_ops; From f1f25a2e2ea0cd3cdc7d01f2e0afbc4aef6fb853 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 25 Apr 2014 19:04:55 +0200 Subject: [PATCH 16/31] block: Fix open_flags in bdrv_reopen() Use the same function as bdrv_open() for determining what the right flags for bs->file are. Without doing this, a reopen means that bs->file loses BDRV_O_CACHE_WB or BDRV_O_UNMAP if bs doesn't have it as well. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- block.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index 9f6f07e75c..b749d31ad4 100644 --- a/block.c +++ b/block.c @@ -1525,8 +1525,11 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, QSIMPLEQ_INIT(bs_queue); } + /* bdrv_open() masks this flag out */ + flags &= ~BDRV_O_PROTOCOL; + if (bs->file) { - bdrv_reopen_queue(bs_queue, bs->file, flags); + bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags)); } bs_entry = g_new0(BlockReopenQueueEntry, 1); From 35d0d40a034b2392f48f91e4e00c8c94e3526a19 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 29 Apr 2014 18:32:25 +0200 Subject: [PATCH 17/31] block: Use error_abort in bdrv_image_info_specific_dump() Currently, bdrv_image_info_specific_dump() uses an error variable for visit_type_ImageInfoSpecific, but ignores the result. As this function is used here with an output visitor to transform the ImageInfoSpecific object to a generic QDict, an error should actually be impossible. It is however better to assert that this is indeed the case. This is done by this patch using error_abort instead of an unused local Error variable. Signed-off-by: Max Reitz Reported-by: Markus Armbruster Signed-off-by: Kevin Wolf --- block/qapi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/qapi.c b/block/qapi.c index 8f2b4dbe7d..af114452e0 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -532,12 +532,11 @@ static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation, void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f, ImageInfoSpecific *info_spec) { - Error *local_err = NULL; QmpOutputVisitor *ov = qmp_output_visitor_new(); QObject *obj, *data; visit_type_ImageInfoSpecific(qmp_output_get_visitor(ov), &info_spec, NULL, - &local_err); + &error_abort); obj = qmp_output_get_qobject(ov); assert(qobject_type(obj) == QTYPE_QDICT); data = qdict_get(qobject_to_qdict(obj), "data"); From 91f827dcff61c3e007def4c949d3a8310954b85e Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 29 Apr 2014 19:03:11 +0200 Subject: [PATCH 18/31] qcow2: Avoid overflow in alloc_clusters_noref() alloc_clusters_noref() stores the cluster index in a uint64_t. However, offsets are often represented as int64_t (as for example the return value of alloc_clusters_noref() itself demonstrates). Therefore, we should make sure all offsets in the allocated range of clusters are representable using int64_t without overflows. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index a37ee45016..d2cb6a8775 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -653,6 +653,13 @@ retry: goto retry; } } + + /* Make sure that all offsets in the "allocated" range are representable + * in an int64_t */ + if (s->free_cluster_index - 1 > (INT64_MAX >> s->cluster_bits)) { + return -EFBIG; + } + #ifdef DEBUG_ALLOC2 fprintf(stderr, "alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n", size, From 521b2b5df0ccad764cf95164c6e428f855067a6f Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 29 Apr 2014 19:03:12 +0200 Subject: [PATCH 19/31] block: Use correct width in format strings Instead of blindly relying on a normal integer having a width of 32 bits (which is a pretty good assumption, but we should not rely on it if there is no need), use the correct format string macros. This does not touch DEBUG output. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- block/cow.c | 2 +- block/dmg.c | 8 ++++---- block/qcow.c | 3 ++- block/qcow2.c | 12 +++++++----- block/sheepdog.c | 6 +++--- block/vdi.c | 21 +++++++++++---------- 6 files changed, 28 insertions(+), 24 deletions(-) diff --git a/block/cow.c b/block/cow.c index 30deb88deb..164759f3a3 100644 --- a/block/cow.c +++ b/block/cow.c @@ -82,7 +82,7 @@ static int cow_open(BlockDriverState *bs, QDict *options, int flags, if (be32_to_cpu(cow_header.version) != COW_VERSION) { char version[64]; snprintf(version, sizeof(version), - "COW version %d", cow_header.version); + "COW version %" PRIu32, cow_header.version); error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, bs->device_name, "cow", version); ret = -ENOTSUP; diff --git a/block/dmg.c b/block/dmg.c index 856402e1f2..1e153cd76d 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -248,8 +248,8 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags, offset += 8; if (s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) { - error_report("sector count %" PRIu64 " for chunk %u is " - "larger than max (%u)", + error_report("sector count %" PRIu64 " for chunk %" PRIu32 + " is larger than max (%u)", s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX); ret = -EINVAL; goto fail; @@ -269,8 +269,8 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags, offset += 8; if (s->lengths[i] > DMG_LENGTHS_MAX) { - error_report("length %" PRIu64 " for chunk %u is larger " - "than max (%u)", + error_report("length %" PRIu64 " for chunk %" PRIu32 + " is larger than max (%u)", s->lengths[i], i, DMG_LENGTHS_MAX); ret = -EINVAL; goto fail; diff --git a/block/qcow.c b/block/qcow.c index d5a7d5fd1e..937dd6dd1c 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -119,7 +119,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, } if (header.version != QCOW_VERSION) { char version[64]; - snprintf(version, sizeof(version), "QCOW version %d", header.version); + snprintf(version, sizeof(version), "QCOW version %" PRIu32, + header.version); error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, bs->device_name, "qcow", version); ret = -ENOTSUP; diff --git a/block/qcow2.c b/block/qcow2.c index e903d971c3..a4b97e8263 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -124,8 +124,9 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, case QCOW2_EXT_MAGIC_BACKING_FORMAT: if (ext.len >= sizeof(bs->backing_format)) { - error_setg(errp, "ERROR: ext_backing_format: len=%u too large" - " (>=%zu)", ext.len, sizeof(bs->backing_format)); + error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32 + " too large (>=%zu)", ext.len, + sizeof(bs->backing_format)); return 2; } ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len); @@ -483,7 +484,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } if (header.version < 2 || header.version > 3) { - report_unsupported(bs, errp, "QCOW version %d", header.version); + report_unsupported(bs, errp, "QCOW version %" PRIu32, header.version); ret = -ENOTSUP; goto fail; } @@ -493,7 +494,8 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, /* Initialise cluster size */ if (header.cluster_bits < MIN_CLUSTER_BITS || header.cluster_bits > MAX_CLUSTER_BITS) { - error_setg(errp, "Unsupported cluster size: 2^%i", header.cluster_bits); + error_setg(errp, "Unsupported cluster size: 2^%" PRIu32, + header.cluster_bits); ret = -EINVAL; goto fail; } @@ -591,7 +593,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, s->refcount_order = header.refcount_order; if (header.crypt_method > QCOW_CRYPT_AES) { - error_setg(errp, "Unsupported encryption method: %i", + error_setg(errp, "Unsupported encryption method: %" PRIu32, header.crypt_method); ret = -EINVAL; goto fail; diff --git a/block/sheepdog.c b/block/sheepdog.c index 0eb33ee80e..2c3fb016a8 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1099,7 +1099,7 @@ static int find_vdi_name(BDRVSheepdogState *s, const char *filename, } if (rsp->result != SD_RES_SUCCESS) { - error_report("cannot get vdi info, %s, %s %d %s", + error_report("cannot get vdi info, %s, %s %" PRIu32 " %s", sd_strerror(rsp->result), filename, snapid, tag); if (rsp->result == SD_RES_NO_VDI) { ret = -ENOENT; @@ -2316,8 +2316,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) sn_tab[found].vm_state_size = inode.vm_state_size; sn_tab[found].vm_clock_nsec = inode.vm_clock_nsec; - snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), "%u", - inode.snap_id); + snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), + "%" PRIu32, inode.snap_id); pstrcpy(sn_tab[found].name, MIN(sizeof(sn_tab[found].name), sizeof(inode.tag)), inode.tag); diff --git a/block/vdi.c b/block/vdi.c index 820cd376b3..81faa25f8a 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -408,34 +408,35 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, } if (header.signature != VDI_SIGNATURE) { - error_setg(errp, "Image not in VDI format (bad signature %08x)", header.signature); + error_setg(errp, "Image not in VDI format (bad signature %08" PRIx32 + ")", header.signature); ret = -EINVAL; goto fail; } else if (header.version != VDI_VERSION_1_1) { - error_setg(errp, "unsupported VDI image (version %u.%u)", - header.version >> 16, header.version & 0xffff); + error_setg(errp, "unsupported VDI image (version %" PRIu32 ".%" PRIu32 + ")", header.version >> 16, header.version & 0xffff); ret = -ENOTSUP; goto fail; } else if (header.offset_bmap % SECTOR_SIZE != 0) { /* We only support block maps which start on a sector boundary. */ error_setg(errp, "unsupported VDI image (unaligned block map offset " - "0x%x)", header.offset_bmap); + "0x%" PRIx32 ")", header.offset_bmap); ret = -ENOTSUP; goto fail; } else if (header.offset_data % SECTOR_SIZE != 0) { /* We only support data blocks which start on a sector boundary. */ - error_setg(errp, "unsupported VDI image (unaligned data offset 0x%x)", - header.offset_data); + error_setg(errp, "unsupported VDI image (unaligned data offset 0x%" + PRIx32 ")", header.offset_data); ret = -ENOTSUP; goto fail; } else if (header.sector_size != SECTOR_SIZE) { - error_setg(errp, "unsupported VDI image (sector size %u is not %u)", - header.sector_size, SECTOR_SIZE); + error_setg(errp, "unsupported VDI image (sector size %" PRIu32 + " is not %u)", header.sector_size, SECTOR_SIZE); ret = -ENOTSUP; goto fail; } else if (header.block_size != DEFAULT_CLUSTER_SIZE) { - error_setg(errp, "unsupported VDI image (block size %u is not %u)", - header.block_size, DEFAULT_CLUSTER_SIZE); + error_setg(errp, "unsupported VDI image (block size %" PRIu32 + " is not %u)", header.block_size, DEFAULT_CLUSTER_SIZE); ret = -ENOTSUP; goto fail; } else if (header.disk_size > From a49139af77850d64d74f9ffe43cabe7aa4f19de0 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 29 Apr 2014 19:03:13 +0200 Subject: [PATCH 20/31] qcow2: Catch bdrv_getlength() error The call to bdrv_getlength() from qcow2_check_refcounts() may result in an error. Check this and abort if necessary. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index d2cb6a8775..e79895d11d 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1487,6 +1487,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, int ret; size = bdrv_getlength(bs->file); + if (size < 0) { + res->check_errors++; + return size; + } + nb_clusters = size_to_clusters(s, size); if (nb_clusters > INT_MAX) { res->check_errors++; From b93f995081cc32e56071fef179161d2907d0491e Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 29 Apr 2014 19:03:14 +0200 Subject: [PATCH 21/31] qcow2: Check min_size in qcow2_grow_l1_table() First, new_l1_size is an int64_t, whereas min_size is a uint64_t. Therefore, during the loop which adjusts new_l1_size until it equals or exceeds min_size, new_l1_size might overflow and become negative. The comparison in the loop condition however will take it as an unsigned value (because min_size is unsigned) and therefore recognize it as exceeding min_size. Therefore, the loop is left with a negative new_l1_size, which is not correct. This could be fixed by making new_l1_size uint64_t. On the other hand, however, by doing this, the while loop may take forever. If min_size is e.g. UINT64_MAX, it will take new_l1_size probably multiple overflows to reach the exact same value (if it reaches it at all). Then, right after the loop, new_l1_size will be recognized as being too big anyway. Both problems require a ridiculously high min_size value, which is very unlikely to occur; but both problems are also simply avoided by checking whether min_size is sane before calculating new_l1_size (which should still be checked separately, though). Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index b746429def..76d2bcf63a 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -42,6 +42,13 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, if (min_size <= s->l1_size) return 0; + /* Do a sanity check on min_size before trying to calculate new_l1_size + * (this prevents overflows during the while loop for the calculation of + * new_l1_size) */ + if (min_size > INT_MAX / sizeof(uint64_t)) { + return -EFBIG; + } + if (exact_size) { new_l1_size = min_size; } else { From e1b42f456fad6e797eaf795ed2e400c4e47d5eb4 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 29 Apr 2014 19:03:15 +0200 Subject: [PATCH 22/31] block/bochs: Fix error handling for seek_to_sector() Currently, seek_to_sector() returns -1 both for errors and unallocated sectors, resulting in silent errors. As 0 is an invalid offset of data clusters (bitmap_offset is greater than 0 because s->data_offset is greater than 0), just return 0 for unallocated sectors and -errno in case of error. This should then be propagated by bochs_read(), the sole user of seek_to_sector(). That function also has a case of "return -1 in case of error", which is fixed by this patch as well. bochs_read() is called by bochs_co_read() which passes the return value through, therefore it is indeed correct for bochs_read() to return -errno. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- block/bochs.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/block/bochs.c b/block/bochs.c index eacf956e7d..eba23df335 100644 --- a/block/bochs.c +++ b/block/bochs.c @@ -187,13 +187,14 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num) uint64_t offset = sector_num * 512; uint64_t extent_index, extent_offset, bitmap_offset; char bitmap_entry; + int ret; // seek to sector extent_index = offset / s->extent_size; extent_offset = (offset % s->extent_size) / 512; if (s->catalog_bitmap[extent_index] == 0xffffffff) { - return -1; /* not allocated */ + return 0; /* not allocated */ } bitmap_offset = s->data_offset + @@ -201,13 +202,14 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num) (s->extent_blocks + s->bitmap_blocks)); /* read in bitmap for current extent */ - if (bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8), - &bitmap_entry, 1) != 1) { - return -1; + ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8), + &bitmap_entry, 1); + if (ret < 0) { + return ret; } if (!((bitmap_entry >> (extent_offset % 8)) & 1)) { - return -1; /* not allocated */ + return 0; /* not allocated */ } return bitmap_offset + (512 * (s->bitmap_blocks + extent_offset)); @@ -220,13 +222,16 @@ static int bochs_read(BlockDriverState *bs, int64_t sector_num, while (nb_sectors > 0) { int64_t block_offset = seek_to_sector(bs, sector_num); - if (block_offset >= 0) { + if (block_offset < 0) { + return block_offset; + } else if (block_offset > 0) { ret = bdrv_pread(bs->file, block_offset, buf, 512); - if (ret != 512) { - return -1; + if (ret < 0) { + return ret; } - } else + } else { memset(buf, 0, 512); + } nb_sectors--; sector_num++; buf += 512; From 0549ea8b6d3ed4eba9a3bd0abfaed3af5de69873 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 29 Apr 2014 19:03:16 +0200 Subject: [PATCH 23/31] block/vdi: Error out immediately in vdi_create() Currently, if an error occurs during the part of vdi_create() which actually writes the image, the function stores -errno, but continues anyway. Instead of trying to write data which (if it can be written at all) does not make any sense without the operations before succeeding (e.g., writing the image header), just error out immediately. Signed-off-by: Max Reitz Reviewed-by: Stefan Weil Signed-off-by: Kevin Wolf --- block/vdi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/block/vdi.c b/block/vdi.c index 81faa25f8a..27737af555 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -756,6 +756,7 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options, vdi_header_to_le(&header); if (write(fd, &header, sizeof(header)) < 0) { result = -errno; + goto close_and_exit; } if (bmap_size > 0) { @@ -769,6 +770,8 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options, } if (write(fd, bmap, bmap_size) < 0) { result = -errno; + g_free(bmap); + goto close_and_exit; } g_free(bmap); } @@ -776,10 +779,12 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options, if (image_type == VDI_TYPE_STATIC) { if (ftruncate(fd, sizeof(header) + bmap_size + blocks * block_size)) { result = -errno; + goto close_and_exit; } } - if (close(fd) < 0) { +close_and_exit: + if ((close(fd) < 0) && !result) { result = -errno; } From f6246509be602369cfa1250965e1e62a0c62c99f Mon Sep 17 00:00:00 2001 From: Matthew Booth Date: Tue, 29 Apr 2014 16:03:25 +0100 Subject: [PATCH 24/31] curl: Fix long line Signed-off-by: Matthew Booth Tested-by: Richard W.M. Jones Signed-off-by: Kevin Wolf --- block/curl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/curl.c b/block/curl.c index 6731d2891f..0404dbdfd6 100644 --- a/block/curl.c +++ b/block/curl.c @@ -256,7 +256,8 @@ static void curl_multi_read(BDRVCURLState *s) case CURLMSG_DONE: { CURLState *state = NULL; - curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE, (char**)&state); + curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE, + (char **)&state); /* ACBs for successful messages get completed in curl_read_cb */ if (msg->data.result != CURLE_OK) { From 9e550b326076caf4a1756b77eee95ad60b4adc27 Mon Sep 17 00:00:00 2001 From: Matthew Booth Date: Tue, 29 Apr 2014 16:03:26 +0100 Subject: [PATCH 25/31] curl: Remove unnecessary use of goto This isn't any of the usually acceptable uses of goto. Signed-off-by: Matthew Booth Tested-by: Richard W.M. Jones Signed-off-by: Kevin Wolf --- block/curl.c | 53 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/block/curl.c b/block/curl.c index 0404dbdfd6..e97f4499c9 100644 --- a/block/curl.c +++ b/block/curl.c @@ -343,39 +343,38 @@ static CURLState *curl_init_state(BDRVCURLState *s) } } while(!state); - if (state->curl) - goto has_curl; + if (!state->curl) { + state->curl = curl_easy_init(); + if (!state->curl) { + return NULL; + } + curl_easy_setopt(state->curl, CURLOPT_URL, s->url); + curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, 5); + curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION, + (void *)curl_read_cb); + curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state); + curl_easy_setopt(state->curl, CURLOPT_PRIVATE, (void *)state); + curl_easy_setopt(state->curl, CURLOPT_AUTOREFERER, 1); + curl_easy_setopt(state->curl, CURLOPT_FOLLOWLOCATION, 1); + curl_easy_setopt(state->curl, CURLOPT_NOSIGNAL, 1); + curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg); + curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1); - state->curl = curl_easy_init(); - if (!state->curl) - return NULL; - curl_easy_setopt(state->curl, CURLOPT_URL, s->url); - curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, 5); - curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION, (void *)curl_read_cb); - curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state); - curl_easy_setopt(state->curl, CURLOPT_PRIVATE, (void *)state); - curl_easy_setopt(state->curl, CURLOPT_AUTOREFERER, 1); - curl_easy_setopt(state->curl, CURLOPT_FOLLOWLOCATION, 1); - curl_easy_setopt(state->curl, CURLOPT_NOSIGNAL, 1); - curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg); - curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1); - - /* Restrict supported protocols to avoid security issues in the more - * obscure protocols. For example, do not allow POP3/SMTP/IMAP see - * CVE-2013-0249. - * - * Restricting protocols is only supported from 7.19.4 upwards. - */ + /* Restrict supported protocols to avoid security issues in the more + * obscure protocols. For example, do not allow POP3/SMTP/IMAP see + * CVE-2013-0249. + * + * Restricting protocols is only supported from 7.19.4 upwards. + */ #if LIBCURL_VERSION_NUM >= 0x071304 - curl_easy_setopt(state->curl, CURLOPT_PROTOCOLS, PROTOCOLS); - curl_easy_setopt(state->curl, CURLOPT_REDIR_PROTOCOLS, PROTOCOLS); + curl_easy_setopt(state->curl, CURLOPT_PROTOCOLS, PROTOCOLS); + curl_easy_setopt(state->curl, CURLOPT_REDIR_PROTOCOLS, PROTOCOLS); #endif #ifdef DEBUG_VERBOSE - curl_easy_setopt(state->curl, CURLOPT_VERBOSE, 1); + curl_easy_setopt(state->curl, CURLOPT_VERBOSE, 1); #endif - -has_curl: + } state->s = s; From 38bbc0a580f9f10570b1d1b5d3e92f0e6feb2970 Mon Sep 17 00:00:00 2001 From: Matthew Booth Date: Tue, 29 Apr 2014 16:03:27 +0100 Subject: [PATCH 26/31] curl: Fix return from curl_read_cb with invalid state A curl write callback is supposed to return the number of bytes it handled. curl_read_cb would have erroneously reported it had handled all bytes in the event that the internal curl state was invalid. Signed-off-by: Matthew Booth Tested-by: Richard W.M. Jones Signed-off-by: Kevin Wolf --- block/curl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/curl.c b/block/curl.c index e97f4499c9..26c9cac505 100644 --- a/block/curl.c +++ b/block/curl.c @@ -155,7 +155,7 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque) DPRINTF("CURL: Just reading %zd bytes\n", realsize); if (!s || !s->orig_buf) - goto read_end; + return 0; if (s->buf_off >= s->buf_len) { /* buffer full, read nothing */ @@ -180,7 +180,6 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque) } } -read_end: return realsize; } From e466183718bfaaf347a3c02499473068a0072114 Mon Sep 17 00:00:00 2001 From: Matthew Booth Date: Tue, 29 Apr 2014 16:03:28 +0100 Subject: [PATCH 27/31] curl: Remove erroneous sleep waiting for curl completion The driver will not start more than a fixed number of curl sessions. If it needs more, it must wait for the completion of an existing one. The driver was sleeping, which will prevent the main loop from running, and therefore the event it's waiting on. It was also directly calling its internal handler rather than waiting on existing registered handlers to be called from the main loop. This change causes it simply to wait for a period of time whilst allowing the main loop to execute. Signed-off-by: Matthew Booth Tested-by: Richard W.M. Jones Signed-off-by: Kevin Wolf --- block/curl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/curl.c b/block/curl.c index 26c9cac505..50bd05f204 100644 --- a/block/curl.c +++ b/block/curl.c @@ -337,8 +337,7 @@ static CURLState *curl_init_state(BDRVCURLState *s) break; } if (!state) { - g_usleep(100); - curl_multi_do(s); + qemu_aio_wait(); } } while(!state); From b69cdef876340624bb40a2054d14f298471a40a6 Mon Sep 17 00:00:00 2001 From: Matthew Booth Date: Tue, 29 Apr 2014 16:03:29 +0100 Subject: [PATCH 28/31] curl: Remove unnecessary explicit calls to internal event handler Remove calls to curl_multi_do where the relevant handles are already registered to the event loop. Ensure that we kick off socket handling with CURL_SOCKET_TIMEOUT after adding a new handle. Signed-off-by: Matthew Booth Tested-by: Richard W.M. Jones Signed-off-by: Kevin Wolf --- block/curl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/curl.c b/block/curl.c index 50bd05f204..fd2756ec50 100644 --- a/block/curl.c +++ b/block/curl.c @@ -535,7 +535,6 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s); curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb); #endif - curl_multi_do(s); qemu_opts_del(opts); return 0; @@ -564,6 +563,7 @@ static const AIOCBInfo curl_aiocb_info = { static void curl_readv_bh_cb(void *p) { CURLState *state; + int running; CURLAIOCB *acb = p; BDRVCURLState *s = acb->common.bs->opaque; @@ -612,8 +612,9 @@ static void curl_readv_bh_cb(void *p) curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range); curl_multi_add_handle(s->multi, state->curl); - curl_multi_do(s); + /* Tell curl it needs to kick things off */ + curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running); } static BlockDriverAIOCB *curl_aio_readv(BlockDriverState *bs, From 838ef602498b8d1985a231a06f5e328e2946a81d Mon Sep 17 00:00:00 2001 From: Matthew Booth Date: Tue, 29 Apr 2014 16:03:30 +0100 Subject: [PATCH 29/31] curl: Eliminate unnecessary use of curl_multi_socket_all curl_multi_socket_all is a deprecated catch-all which checks for activities on all open curl sockets. We have enough information from the event loop to check only the sockets with activity. This change removes use of curl_multi_socket_all in favour of curl_multi_socket_action called with the relevant handle. At the same time, it also ensures that the driver only checks for completion of read operations after reading from a socket, rather than both reading and writing. Signed-off-by: Matthew Booth Tested-by: Richard W.M. Jones Signed-off-by: Kevin Wolf --- block/curl.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/block/curl.c b/block/curl.c index fd2756ec50..f3a4445972 100644 --- a/block/curl.c +++ b/block/curl.c @@ -71,6 +71,7 @@ typedef struct CURLState struct BDRVCURLState *s; CURLAIOCB *acb[CURL_NUM_ACB]; CURL *curl; + curl_socket_t sock_fd; char *orig_buf; size_t buf_start; size_t buf_off; @@ -92,6 +93,7 @@ typedef struct BDRVCURLState { static void curl_clean_state(CURLState *s); static void curl_multi_do(void *arg); +static void curl_multi_read(void *arg); #ifdef NEED_CURL_TIMER_CALLBACK static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque) @@ -113,16 +115,20 @@ static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque) static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action, void *s, void *sp) { + CURLState *state = NULL; + curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&state); + state->sock_fd = fd; + DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd); switch (action) { case CURL_POLL_IN: - qemu_aio_set_fd_handler(fd, curl_multi_do, NULL, s); + qemu_aio_set_fd_handler(fd, curl_multi_read, NULL, state); break; case CURL_POLL_OUT: - qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, s); + qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, state); break; case CURL_POLL_INOUT: - qemu_aio_set_fd_handler(fd, curl_multi_do, curl_multi_do, s); + qemu_aio_set_fd_handler(fd, curl_multi_read, curl_multi_do, state); break; case CURL_POLL_REMOVE: qemu_aio_set_fd_handler(fd, NULL, NULL, NULL); @@ -236,7 +242,7 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len, return FIND_RET_NONE; } -static void curl_multi_read(BDRVCURLState *s) +static void curl_multi_check_completion(BDRVCURLState *s) { int msgs_in_queue; @@ -286,19 +292,26 @@ static void curl_multi_read(BDRVCURLState *s) static void curl_multi_do(void *arg) { - BDRVCURLState *s = (BDRVCURLState *)arg; + CURLState *s = (CURLState *)arg; int running; int r; - if (!s->multi) { + if (!s->s->multi) { return; } do { - r = curl_multi_socket_all(s->multi, &running); + r = curl_multi_socket_action(s->s->multi, s->sock_fd, 0, &running); } while(r == CURLM_CALL_MULTI_PERFORM); - curl_multi_read(s); +} + +static void curl_multi_read(void *arg) +{ + CURLState *s = (CURLState *)arg; + + curl_multi_do(arg); + curl_multi_check_completion(s->s); } static void curl_multi_timeout_do(void *arg) @@ -313,7 +326,7 @@ static void curl_multi_timeout_do(void *arg) curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running); - curl_multi_read(s); + curl_multi_check_completion(s); #else abort(); #endif @@ -529,7 +542,6 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, // initialize the multi interface! s->multi = curl_multi_init(); - curl_multi_setopt(s->multi, CURLMOPT_SOCKETDATA, s); curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb); #ifdef NEED_CURL_TIMER_CALLBACK curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s); From 1f2cead324436da25c3607f4b957f0198a01fc01 Mon Sep 17 00:00:00 2001 From: Matthew Booth Date: Tue, 29 Apr 2014 16:03:31 +0100 Subject: [PATCH 30/31] curl: Ensure all informationals are checked for completion According to the documentation, the correct way to ensure all informationals have been returned by curl_multi_info_read is to loop until it returns NULL. Signed-off-by: Matthew Booth Tested-by: Richard W.M. Jones Signed-off-by: Kevin Wolf --- block/curl.c | 49 +++++++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/block/curl.c b/block/curl.c index f3a4445972..16e7db8eea 100644 --- a/block/curl.c +++ b/block/curl.c @@ -248,46 +248,39 @@ static void curl_multi_check_completion(BDRVCURLState *s) /* Try to find done transfers, so we can free the easy * handle again. */ - do { + for (;;) { CURLMsg *msg; msg = curl_multi_info_read(s->multi, &msgs_in_queue); + /* Quit when there are no more completions */ if (!msg) break; - if (msg->msg == CURLMSG_NONE) - break; - switch (msg->msg) { - case CURLMSG_DONE: - { - CURLState *state = NULL; - curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE, - (char **)&state); + if (msg->msg == CURLMSG_DONE) { + CURLState *state = NULL; + curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE, + (char **)&state); - /* ACBs for successful messages get completed in curl_read_cb */ - if (msg->data.result != CURLE_OK) { - int i; - for (i = 0; i < CURL_NUM_ACB; i++) { - CURLAIOCB *acb = state->acb[i]; + /* ACBs for successful messages get completed in curl_read_cb */ + if (msg->data.result != CURLE_OK) { + int i; + for (i = 0; i < CURL_NUM_ACB; i++) { + CURLAIOCB *acb = state->acb[i]; - if (acb == NULL) { - continue; - } - - acb->common.cb(acb->common.opaque, -EIO); - qemu_aio_release(acb); - state->acb[i] = NULL; + if (acb == NULL) { + continue; } - } - curl_clean_state(state); - break; + acb->common.cb(acb->common.opaque, -EIO); + qemu_aio_release(acb); + state->acb[i] = NULL; + } } - default: - msgs_in_queue = 0; - break; + + curl_clean_state(state); + break; } - } while(msgs_in_queue); + } } static void curl_multi_do(void *arg) From b7079df4100069959f4e9d90d5cb5ba7d4ebbf1a Mon Sep 17 00:00:00 2001 From: Matthew Booth Date: Tue, 29 Apr 2014 16:03:32 +0100 Subject: [PATCH 31/31] curl: Fix hang reading from slow connections When receiving a new aio read request, we first look for an existing transaction whose range will cover the read request by the time it completes. However, we weren't checking that the existing transaction was still active. If it had timed out, we were adding the request to a transaction which would never complete and had already been cancelled, resulting in a hang. Signed-off-by: Matthew Booth Tested-by: Richard W.M. Jones Signed-off-by: Kevin Wolf --- block/curl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/curl.c b/block/curl.c index 16e7db8eea..d2f1084b91 100644 --- a/block/curl.c +++ b/block/curl.c @@ -220,7 +220,8 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len, } // Wait for unfinished chunks - if ((start >= state->buf_start) && + if (state->in_use && + (start >= state->buf_start) && (start <= buf_fend) && (end >= state->buf_start) && (end <= buf_fend))