From 709e57753b57cbef674e88e85056612d3e1727c2 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 15 Dec 2014 13:07:17 +0800 Subject: [PATCH 01/38] qemu-iotests: Remove 091 from quick group For the purpose of allowing running quick group on tmpfs. Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- tests/qemu-iotests/group | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index a4742c6d01..08099b9e1d 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -97,7 +97,7 @@ 088 rw auto quick 089 rw auto quick 090 rw auto quick -091 rw auto quick +091 rw auto 092 rw auto quick 095 rw auto quick 097 rw auto backing From b8aff7d6bfa1632d722f47a1a6bca679221b6db3 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 15 Dec 2014 13:07:18 +0800 Subject: [PATCH 02/38] qemu-iotests: Speed up make check-block Using /tmp, which is usually mounted as tmpfs, the quick group can be quicker. On my laptop (Lenovo T430s with Fedora 20), this reduces the time from 50s to 30s. Signed-off-by: Fam Zheng Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- tests/qemu-iotests-quick.sh | 2 +- tests/qemu-iotests/check | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests-quick.sh b/tests/qemu-iotests-quick.sh index 12af731c68..0e554bb972 100755 --- a/tests/qemu-iotests-quick.sh +++ b/tests/qemu-iotests-quick.sh @@ -3,6 +3,6 @@ cd tests/qemu-iotests ret=0 -./check -T -qcow2 -g quick || ret=1 +TEST_DIR=${TEST_DIR:-/tmp/qemu-iotests-quick-$$} ./check -T -qcow2 -g quick || ret=1 exit $ret diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index 8ca40116d7..baeae80f96 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -238,6 +238,7 @@ QEMU_NBD -- $QEMU_NBD IMGFMT -- $FULL_IMGFMT_DETAILS IMGPROTO -- $FULL_IMGPROTO_DETAILS PLATFORM -- $FULL_HOST_DETAILS +TEST_DIR -- $TEST_DIR SOCKET_SCM_HELPER -- $SOCKET_SCM_HELPER EOF From fcf5def1ab0f94ff120b7141c943b517d2ece83d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 17 Dec 2014 16:09:58 +0100 Subject: [PATCH 03/38] block: mark AioContext as recursive AioContext can be accessed recursively, in fact that's what we do with aio_poll. Marking the GSource as recursive avoids that GLib blocks it and unblocks it around every call to aio_dispatch, which is a pretty expensive operation. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- async.c | 1 + 1 file changed, 1 insertion(+) diff --git a/async.c b/async.c index 3939b795e5..572f239e2c 100644 --- a/async.c +++ b/async.c @@ -300,6 +300,7 @@ AioContext *aio_context_new(Error **errp) error_setg_errno(errp, -ret, "Failed to initialize event notifier"); return NULL; } + g_source_set_can_recurse(&ctx->source, true); aio_set_event_notifier(ctx, &ctx->notifier, (EventNotifierHandler *) event_notifier_test_and_clear); From e012b78cf5bc42f20ef1a1f78383035f2293ceea Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 17 Dec 2014 16:09:59 +0100 Subject: [PATCH 04/38] block: do not allocate an iovec per read of a growable/zero_after_eof BDS Most reads do not go past the end of the file, and they can use the input QEMUIOVector instead of creating one. This removes the qemu_iovec_* functions from the profile. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/block.c b/block.c index 4165d4265c..58f804228c 100644 --- a/block.c +++ b/block.c @@ -3034,18 +3034,16 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num), align >> BDRV_SECTOR_BITS); - if (max_nb_sectors > 0) { + if (nb_sectors < max_nb_sectors) { + ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); + } else if (max_nb_sectors > 0) { QEMUIOVector local_qiov; - size_t local_sectors; - - max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS); - local_sectors = MIN(max_nb_sectors, nb_sectors); qemu_iovec_init(&local_qiov, qiov->niov); qemu_iovec_concat(&local_qiov, qiov, 0, - local_sectors * BDRV_SECTOR_SIZE); + max_nb_sectors * BDRV_SECTOR_SIZE); - ret = drv->bdrv_co_readv(bs, sector_num, local_sectors, + ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors, &local_qiov); qemu_iovec_destroy(&local_qiov); From ee82310f8a6cc9ba9e8d1ac892f1a3cfc0debd8c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 17 Dec 2014 16:10:00 +0100 Subject: [PATCH 05/38] block: replace g_new0 with g_new for bottom half allocation. This saves about 15% of the clock cycles spent on allocation. Using the slice allocator does not add a visible improvement; allocation is faster than malloc, while freeing seems to be slower. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- async.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/async.c b/async.c index 572f239e2c..2be88cc9e9 100644 --- a/async.c +++ b/async.c @@ -44,10 +44,12 @@ struct QEMUBH { QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque) { QEMUBH *bh; - bh = g_new0(QEMUBH, 1); - bh->ctx = ctx; - bh->cb = cb; - bh->opaque = opaque; + bh = g_new(QEMUBH, 1); + *bh = (QEMUBH){ + .ctx = ctx, + .cb = cb, + .opaque = opaque, + }; qemu_mutex_lock(&ctx->bh_lock); bh->next = ctx->first_bh; /* Make sure that the members are ready before putting bh into list */ From a97ceca578e69f9b050a2400e1979a068b0654eb Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 26 Nov 2014 17:20:24 +0100 Subject: [PATCH 06/38] checkpatch: Brace handling on multi-line condition CODING_STYLE states the following about braces around blocks: > The opening brace is on the line that contains the control flow > statement that introduces the new block; [...] This is obviously impossible with multi-line conditions. Therefore, CODING_STYLE does not make any clear statement about where to put the opening brace after a multi-line condition. There is a reason to prefer to place the opening brace on an own line after such a condition while still placing it on the same line as the "control flow statement" if possible; that reason is that the last line of a multi-line condition is indented, in the case of "if", it is often indented by four spaces, just as much as the first statement in the block will be indented. This is hard to read as there is no clearly visible distinction between condition and block. Placing the opening brace on a separate line solves this issue. Also, there are cases where placing the opening brace on a separate line is the only viable option; if the previous line had nearly 80 characters and splitting it is not desirable, the opening brace is naturally placed on an own line. This patch fixes checkpatch.pl to not complain about braces on own lines if the condition introducing the block spanned more than one line, or if the previous line had 79 or 80 characters. Furthermore, the warning about not having braces around a block is fixed to mind braces not being on the last line of the condition. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- scripts/checkpatch.pl | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 053e4320fc..5df61f9aa9 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -1639,7 +1639,13 @@ sub process { #print "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n"; #print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n"; - if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln -1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) { + # The length of the "previous line" is checked against 80 because it + # includes the + at the beginning of the line (if the actual line has + # 79 or 80 characters, it is no longer possible to add a space and an + # opening brace there) + if ($#ctx == 0 && $ctx !~ /{\s*/ && + defined($lines[$ctx_ln - 1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/ && + defined($lines[$ctx_ln - 2]) && length($lines[$ctx_ln - 2]) < 80) { ERROR("that open brace { should be on the previous line\n" . "$here\n$ctx\n$rawlines[$ctx_ln - 1]\n"); } @@ -2542,7 +2548,10 @@ sub process { substr($block, 0, length($cond), ''); - $seen++ if ($block =~ /^\s*{/); + my $spaced_block = $block; + $spaced_block =~ s/\n\+/ /g; + + $seen++ if ($spaced_block =~ /^\s*{/); print "APW: cond<$cond> block<$block> allowed<$allowed>\n" if $dbg_adv_apw; From 0a82855a1a819f3de49781d42728f485fbd64284 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 26 Nov 2014 17:20:25 +0100 Subject: [PATCH 07/38] block: Get full backing filename from string Introduce bdrv_get_full_backing_filename_from_filename(), a function which takes the name of the backed file and a potentially relative backing filename to produce the full (absolute) backing filename. Use this function from bdrv_get_full_backing_filename(). Signed-off-by: Max Reitz Reviewed-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block.c | 18 +++++++++++++----- include/block/block.h | 3 +++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/block.c b/block.c index 58f804228c..86f2faa266 100644 --- a/block.c +++ b/block.c @@ -303,13 +303,21 @@ void path_combine(char *dest, int dest_size, } } +void bdrv_get_full_backing_filename_from_filename(const char *backed, + const char *backing, + char *dest, size_t sz) +{ + if (backing[0] == '\0' || path_has_protocol(backing)) { + pstrcpy(dest, sz, backing); + } else { + path_combine(dest, sz, backed, backing); + } +} + void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz) { - if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) { - pstrcpy(dest, sz, bs->backing_file); - } else { - path_combine(dest, sz, bs->filename, bs->backing_file); - } + bdrv_get_full_backing_filename_from_filename(bs->filename, bs->backing_file, + dest, sz); } void bdrv_register(BlockDriver *bdrv) diff --git a/include/block/block.h b/include/block/block.h index 6e7275d95b..eac9bb0957 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -397,6 +397,9 @@ void bdrv_get_backing_filename(BlockDriverState *bs, char *filename, int filename_size); void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz); +void bdrv_get_full_backing_filename_from_filename(const char *backed, + const char *backing, + char *dest, size_t sz); int bdrv_is_snapshot(BlockDriverState *bs); int path_has_protocol(const char *path); From 9f07429e8873124c588847b0d499cb32b9410bdd Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 26 Nov 2014 17:20:26 +0100 Subject: [PATCH 08/38] block: JSON filenames and relative backing files When using a relative backing file name, qemu needs to know the directory of the top image file. For JSON filenames, such a directory cannot be easily determined (e.g. how do you determine the directory of a qcow2 BDS directly on top of a quorum BDS?). Therefore, do not allow relative filenames for the backing file of BDSs only having a JSON filename. Furthermore, BDS::exact_filename should be used whenever possible. If BDS::filename is not equal to BDS::exact_filename, the former will always be a JSON object. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- block.c | 28 ++++++++++++++++++++++------ block/qapi.c | 7 ++++++- include/block/block.h | 5 +++-- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/block.c b/block.c index 86f2faa266..cf0867c9d7 100644 --- a/block.c +++ b/block.c @@ -305,19 +305,28 @@ void path_combine(char *dest, int dest_size, void bdrv_get_full_backing_filename_from_filename(const char *backed, const char *backing, - char *dest, size_t sz) + char *dest, size_t sz, + Error **errp) { - if (backing[0] == '\0' || path_has_protocol(backing)) { + if (backing[0] == '\0' || path_has_protocol(backing) || + path_is_absolute(backing)) + { pstrcpy(dest, sz, backing); + } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { + error_setg(errp, "Cannot use relative backing file names for '%s'", + backed); } else { path_combine(dest, sz, backed, backing); } } -void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz) +void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz, + Error **errp) { - bdrv_get_full_backing_filename_from_filename(bs->filename, bs->backing_file, - dest, sz); + char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename; + + bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file, + dest, sz, errp); } void bdrv_register(BlockDriver *bdrv) @@ -1225,7 +1234,14 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) QDECREF(options); goto free_exit; } else { - bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX); + bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX, + &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + QDECREF(options); + goto free_exit; + } } if (!bs->drv || !bs->drv->supports_backing) { diff --git a/block/qapi.c b/block/qapi.c index fa68ba731f..a6fd6f7ab2 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -214,7 +214,12 @@ void bdrv_query_image_info(BlockDriverState *bs, info->backing_filename = g_strdup(backing_filename); info->has_backing_filename = true; bdrv_get_full_backing_filename(bs, backing_filename2, - sizeof(backing_filename2)); + sizeof(backing_filename2), &err); + if (err) { + error_propagate(errp, err); + qapi_free_ImageInfo(info); + return; + } if (strcmp(backing_filename, backing_filename2) != 0) { info->full_backing_filename = diff --git a/include/block/block.h b/include/block/block.h index eac9bb0957..9efaa80f91 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -396,10 +396,11 @@ const char *bdrv_get_encrypted_filename(BlockDriverState *bs); void bdrv_get_backing_filename(BlockDriverState *bs, char *filename, int filename_size); void bdrv_get_full_backing_filename(BlockDriverState *bs, - char *dest, size_t sz); + char *dest, size_t sz, Error **errp); void bdrv_get_full_backing_filename_from_filename(const char *backed, const char *backing, - char *dest, size_t sz); + char *dest, size_t sz, + Error **errp); int bdrv_is_snapshot(BlockDriverState *bs); int path_has_protocol(const char *path); From 291680186f7f8856d943793414d1b8df6c562fc4 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 26 Nov 2014 17:20:27 +0100 Subject: [PATCH 09/38] block: Relative backing file for image creation Relative backing filenames are always relative to the backed image's directory; the same applies to image creation. Therefore, if the backing file has to be opened for determining its size (in case the size has not been explicitly specified) its filename should be interpreted relative to the new image's base directory and not relative to qemu's working directory. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index cf0867c9d7..be75142446 100644 --- a/block.c +++ b/block.c @@ -5659,16 +5659,26 @@ void bdrv_img_create(const char *filename, const char *fmt, if (size == -1) { if (backing_file) { BlockDriverState *bs; + char *full_backing = g_new0(char, PATH_MAX); int64_t size; int back_flags; + bdrv_get_full_backing_filename_from_filename(filename, backing_file, + full_backing, PATH_MAX, + &local_err); + if (local_err) { + g_free(full_backing); + goto out; + } + /* backing files always opened read-only */ back_flags = flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); bs = NULL; - ret = bdrv_open(&bs, backing_file, NULL, NULL, back_flags, + ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags, backing_drv, &local_err); + g_free(full_backing); if (ret < 0) { goto out; } From 1085daf9411e355ce3bd6d7034e23a8405599889 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 26 Nov 2014 17:20:28 +0100 Subject: [PATCH 10/38] block/vmdk: Relative backing file for creation When a vmdk image is created with a backing file, it is opened to check whether it is indeed a vmdk file by letting qemu probe it. When doing so, the backing filename is relative to the image's base directory so it should be interpreted accordingly. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/vmdk.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/block/vmdk.c b/block/vmdk.c index bfff900ba6..52cb8888e5 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1891,8 +1891,19 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) } if (backing_file) { BlockDriverState *bs = NULL; - ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_NO_BACKING, NULL, + char *full_backing = g_new0(char, PATH_MAX); + bdrv_get_full_backing_filename_from_filename(filename, backing_file, + full_backing, PATH_MAX, + &local_err); + if (local_err) { + g_free(full_backing); + error_propagate(errp, local_err); + ret = -ENOENT; + goto exit; + } + ret = bdrv_open(&bs, full_backing, NULL, NULL, BDRV_O_NO_BACKING, NULL, errp); + g_free(full_backing); if (ret != 0) { goto exit; } From 527ab22a2a268421564cfba3409d081aee22c22b Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 26 Nov 2014 17:20:29 +0100 Subject: [PATCH 11/38] iotests: Add test for relative backing file names Sometimes, qemu does not have a filename to work with, so it does not know which directory to use for a backing file specified by a relative filename. Add a test which tests that qemu exits with an appropriate error message. Additionally, add a test for qemu-img create with a backing filename relative to the backed image's base directory while omitting the image size. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- tests/qemu-iotests/110 | 94 ++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/110.out | 19 ++++++++ tests/qemu-iotests/group | 1 + 3 files changed, 114 insertions(+) create mode 100755 tests/qemu-iotests/110 create mode 100644 tests/qemu-iotests/110.out diff --git a/tests/qemu-iotests/110 b/tests/qemu-iotests/110 new file mode 100755 index 0000000000..a687f9567d --- /dev/null +++ b/tests/qemu-iotests/110 @@ -0,0 +1,94 @@ +#!/bin/bash +# +# Test case for relative backing file names in complex BDS trees +# +# Copyright (C) 2014 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +# Any format supporting backing files +_supported_fmt qed qcow qcow2 vmdk +_supported_proto file +_supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat" + +TEST_IMG_REL=$(basename "$TEST_IMG") + +echo +echo '=== Reconstructable filename ===' +echo + +TEST_IMG="$TEST_IMG.base" _make_test_img 64M +_make_test_img -b "$TEST_IMG_REL.base" 64M +# qemu should be able to reconstruct the filename, so relative backing names +# should work +TEST_IMG="json:{'driver':'$IMGFMT','file':{'driver':'file','filename':'$TEST_IMG'}}" \ + _img_info | _filter_img_info + +echo +echo '=== Non-reconstructable filename ===' +echo + +# Across blkdebug without a config file, you cannot reconstruct filenames, so +# qemu is incapable of knowing the directory of the top image +TEST_IMG="json:{ + 'driver': '$IMGFMT', + 'file': { + 'driver': 'blkdebug', + 'image': { + 'driver': 'file', + 'filename': '$TEST_IMG' + }, + 'set-state': [ + { + 'event': 'read_aio', + 'new_state': 42 + } + ] + } +}" _img_info | _filter_img_info + +echo +echo '=== Backing name is always relative to the backed image ===' +echo + +# omit the image size; it should work anyway +_make_test_img -b "$TEST_IMG_REL.base" + + +# success, all done +echo '*** done' +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/110.out b/tests/qemu-iotests/110.out new file mode 100644 index 0000000000..152bacf41e --- /dev/null +++ b/tests/qemu-iotests/110.out @@ -0,0 +1,19 @@ +QA output created by 110 + +=== Reconstructable filename === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='t.IMGFMT.base' +image: TEST_DIR/t.IMGFMT +file format: IMGFMT +virtual size: 64M (67108864 bytes) +backing file: t.IMGFMT.base (actual path: TEST_DIR/t.IMGFMT.base) + +=== Non-reconstructable filename === + +qemu-img: Cannot use relative backing file names for 'json:{"driver": "IMGFMT", "file": {"set-state.0.event": "read_aio", "image": {"driver": "file", "filename": "TEST_DIR/t.IMGFMT"}, "driver": "blkdebug", "set-state.0.new_state": 42}}' + +=== Backing name is always relative to the backed image === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='t.IMGFMT.base' +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 08099b9e1d..f8bf354156 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -112,6 +112,7 @@ 107 rw auto quick 108 rw auto quick 109 rw auto +110 rw auto backing quick 111 rw auto quick 113 rw auto quick 114 rw auto quick From a06e43556e3faea22de4cce0da7a6f362d3ca9a6 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Tue, 16 Dec 2014 09:40:24 +0800 Subject: [PATCH 12/38] qapi: Fix document for BlockStats.node-name Signed-off-by: Fam Zheng Reviewed-by: Eric Blake Message-id: 1418694024-26498-1-git-send-email-famz@redhat.com Signed-off-by: Max Reitz --- qapi/block-core.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qapi/block-core.json b/qapi/block-core.json index 6e8db15861..2d8bd253a7 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -423,7 +423,7 @@ # @device: #optional If the stats are for a virtual block device, the name # corresponding to the virtual block device. # -# @device: #optional The node name of the device. (Since 2.3) +# @node-name: #optional The node name of the device. (Since 2.3) # # @stats: A @BlockDeviceStats for the device. # From c4237dfa635900e4d1cdc6038d5efe3507f45f0c Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 27 Nov 2014 12:40:46 +0300 Subject: [PATCH 13/38] block: fix spoiling all dirty bitmaps by mirror and migration Mirror and migration use dirty bitmaps for their purposes, and since commit [block: per caller dirty bitmap] they use their own bitmaps, not the global one. But they use old functions bdrv_set_dirty and bdrv_reset_dirty, which change all dirty bitmaps. Named dirty bitmaps series by Fam and Snow are affected: mirroring and migration will spoil all (not related to this mirroring or migration) named dirty bitmaps. This patch fixes this by adding bdrv_set_dirty_bitmap and bdrv_reset_dirty_bitmap, which change concrete bitmap. Also, to prevent such mistakes in future, old functions bdrv_(set,reset)_dirty are made static, for internal block usage. Signed-off-by: Vladimir Sementsov-Ogievskiy CC: John Snow CC: Fam Zheng CC: Denis V. Lunev CC: Stefan Hajnoczi CC: Kevin Wolf Reviewed-by: John Snow Reviewed-by: Fam Zheng Message-id: 1417081246-3593-1-git-send-email-vsementsov@parallels.com Signed-off-by: Max Reitz --- block.c | 23 ++++++++++++++++++++--- block/mirror.c | 11 +++++++---- include/block/block.h | 6 ++++-- migration/block.c | 5 +++-- 4 files changed, 34 insertions(+), 11 deletions(-) diff --git a/block.c b/block.c index be75142446..e76a223eae 100644 --- a/block.c +++ b/block.c @@ -97,6 +97,10 @@ static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = static QLIST_HEAD(, BlockDriver) bdrv_drivers = QLIST_HEAD_INITIALIZER(bdrv_drivers); +static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, + int nr_sectors); +static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, + int nr_sectors); /* If non-zero, use only whitelisted block drivers */ static int use_bdrv_whitelist; @@ -5411,8 +5415,20 @@ void bdrv_dirty_iter_init(BlockDriverState *bs, hbitmap_iter_init(hbi, bitmap->bitmap, 0); } -void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, - int nr_sectors) +void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors) +{ + hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); +} + +void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors) +{ + hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); +} + +static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, + int nr_sectors) { BdrvDirtyBitmap *bitmap; QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { @@ -5420,7 +5436,8 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, } } -void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors) +static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, + int nr_sectors) { BdrvDirtyBitmap *bitmap; QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { diff --git a/block/mirror.c b/block/mirror.c index 2c6dd2a4c1..9019d1ba56 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -128,7 +128,8 @@ static void mirror_write_complete(void *opaque, int ret) BlockDriverState *source = s->common.bs; BlockErrorAction action; - bdrv_set_dirty(source, op->sector_num, op->nb_sectors); + bdrv_set_dirty_bitmap(source, s->dirty_bitmap, op->sector_num, + op->nb_sectors); action = mirror_error_action(s, false, -ret); if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) { s->ret = ret; @@ -145,7 +146,8 @@ static void mirror_read_complete(void *opaque, int ret) BlockDriverState *source = s->common.bs; BlockErrorAction action; - bdrv_set_dirty(source, op->sector_num, op->nb_sectors); + bdrv_set_dirty_bitmap(source, s->dirty_bitmap, op->sector_num, + op->nb_sectors); action = mirror_error_action(s, true, -ret); if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) { s->ret = ret; @@ -286,7 +288,8 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) next_sector += sectors_per_chunk; } - bdrv_reset_dirty(source, sector_num, nb_sectors); + bdrv_reset_dirty_bitmap(source, s->dirty_bitmap, sector_num, + nb_sectors); /* Copy the dirty cluster. */ s->in_flight++; @@ -442,7 +445,7 @@ static void coroutine_fn mirror_run(void *opaque) assert(n > 0); if (ret == 1) { - bdrv_set_dirty(bs, sector_num, n); + bdrv_set_dirty_bitmap(bs, s->dirty_bitmap, sector_num, n); sector_num = next; } else { sector_num += n; diff --git a/include/block/block.h b/include/block/block.h index 9efaa80f91..760e78b71d 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -438,8 +438,10 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity, void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs); int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector); -void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); -void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); +void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors); +void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors); void bdrv_dirty_iter_init(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi); int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); diff --git a/migration/block.c b/migration/block.c index 74d9eb125c..a0f908cf98 100644 --- a/migration/block.c +++ b/migration/block.c @@ -303,7 +303,7 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov, nr_sectors, blk_mig_read_cb, blk); - bdrv_reset_dirty(bs, cur_sector, nr_sectors); + bdrv_reset_dirty_bitmap(bs, bmds->dirty_bitmap, cur_sector, nr_sectors); qemu_mutex_unlock_iothread(); bmds->cur_sector = cur_sector + nr_sectors; @@ -496,7 +496,8 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, g_free(blk); } - bdrv_reset_dirty(bmds->bs, sector, nr_sectors); + bdrv_reset_dirty_bitmap(bmds->bs, bmds->dirty_bitmap, sector, + nr_sectors); break; } sector += BDRV_SECTORS_PER_DIRTY_CHUNK; From b7b9d39a7a5bd3a7fe5968b7780c9868c3210a4d Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 18 Dec 2014 18:37:04 +0800 Subject: [PATCH 14/38] qapi: Comment version info in TransactionAction Signed-off-by: Fam Zheng Reviewed-by: Eric Blake Reviewed-by: Max Reitz Reviewed-by: Markus Armbruster Message-id: 1418899027-8445-2-git-send-email-famz@redhat.com Signed-off-by: Max Reitz --- qapi-schema.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/qapi-schema.json b/qapi-schema.json index 563b4ad98a..47d99cf9e7 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1254,6 +1254,12 @@ # # A discriminated record of operations that can be performed with # @transaction. +# +# Since 1.1 +# +# drive-backup since 1.6 +# abort since 1.6 +# blockdev-snapshot-internal-sync since 1.7 ## { 'union': 'TransactionAction', 'data': { From c29c1dd312f39ec18a3c6177c6da09a75e095d70 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 18 Dec 2014 18:37:05 +0800 Subject: [PATCH 15/38] qmp: Add command 'blockdev-backup' Similar to drive-backup, but this command uses a device id as target instead of creating/opening an image file. Also add blocker on target bs, since the target is also a named device now. Add check and report error for bs == target which became possible but is an illegal case with introduction of blockdev-backup. Signed-off-by: Fam Zheng Reviewed-by: Max Reitz Reviewed-by: John Snow Reviewed-by: Markus Armbruster Message-id: 1418899027-8445-3-git-send-email-famz@redhat.com Signed-off-by: Max Reitz --- block/backup.c | 28 +++++++++++++++++++++++ blockdev.c | 54 ++++++++++++++++++++++++++++++++++++++++++++ qapi/block-core.json | 54 ++++++++++++++++++++++++++++++++++++++++++++ qmp-commands.hx | 42 ++++++++++++++++++++++++++++++++++ 4 files changed, 178 insertions(+) diff --git a/block/backup.c b/block/backup.c index 792e65514b..1c535b1ab9 100644 --- a/block/backup.c +++ b/block/backup.c @@ -360,6 +360,7 @@ static void coroutine_fn backup_run(void *opaque) hbitmap_free(job->bitmap); bdrv_iostatus_disable(target); + bdrv_op_unblock_all(target, job->common.blocker); data = g_malloc(sizeof(*data)); data->ret = ret; @@ -379,6 +380,11 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, assert(target); assert(cb); + if (bs == target) { + error_setg(errp, "Source and target cannot be the same"); + return; + } + if ((on_source_error == BLOCKDEV_ON_ERROR_STOP || on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) && !bdrv_iostatus_is_enabled(bs)) { @@ -386,6 +392,26 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, return; } + if (!bdrv_is_inserted(bs)) { + error_setg(errp, "Device is not inserted: %s", + bdrv_get_device_name(bs)); + return; + } + + if (!bdrv_is_inserted(target)) { + error_setg(errp, "Device is not inserted: %s", + bdrv_get_device_name(target)); + return; + } + + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { + return; + } + + if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) { + return; + } + len = bdrv_getlength(bs); if (len < 0) { error_setg_errno(errp, -len, "unable to get length for '%s'", @@ -399,6 +425,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, return; } + bdrv_op_block_all(target, job->common.blocker); + job->on_source_error = on_source_error; job->on_target_error = on_target_error; job->target = target; diff --git a/blockdev.c b/blockdev.c index 5651a8e140..d6ccc5ebca 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2240,6 +2240,8 @@ void qmp_drive_backup(const char *device, const char *target, aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); + /* Although backup_run has this check too, we need to use bs->drv below, so + * do an early check redundantly. */ if (!bdrv_is_inserted(bs)) { error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); goto out; @@ -2256,6 +2258,7 @@ void qmp_drive_backup(const char *device, const char *target, } } + /* Early check to avoid creating target */ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { goto out; } @@ -2323,6 +2326,57 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp) return bdrv_named_nodes_list(); } +void qmp_blockdev_backup(const char *device, const char *target, + enum MirrorSyncMode sync, + bool has_speed, int64_t speed, + bool has_on_source_error, + BlockdevOnError on_source_error, + bool has_on_target_error, + BlockdevOnError on_target_error, + Error **errp) +{ + BlockDriverState *bs; + BlockDriverState *target_bs; + Error *local_err = NULL; + AioContext *aio_context; + + if (!has_speed) { + speed = 0; + } + if (!has_on_source_error) { + on_source_error = BLOCKDEV_ON_ERROR_REPORT; + } + if (!has_on_target_error) { + on_target_error = BLOCKDEV_ON_ERROR_REPORT; + } + + bs = bdrv_find(device); + if (!bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, device); + return; + } + + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + target_bs = bdrv_find(target); + if (!target_bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, target); + goto out; + } + + bdrv_ref(target_bs); + bdrv_set_aio_context(target_bs, aio_context); + backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error, + block_job_cb, bs, &local_err); + if (local_err != NULL) { + bdrv_unref(target_bs); + error_propagate(errp, local_err); + } +out: + aio_context_release(aio_context); +} + #define DEFAULT_MIRROR_BUF_SIZE (10 << 20) void qmp_drive_mirror(const char *device, const char *target, diff --git a/qapi/block-core.json b/qapi/block-core.json index 2d8bd253a7..80984d1660 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -702,6 +702,41 @@ '*on-source-error': 'BlockdevOnError', '*on-target-error': 'BlockdevOnError' } } +## +# @BlockdevBackup +# +# @device: the name of the device which should be copied. +# +# @target: the name of the backup target device. +# +# @sync: what parts of the disk image should be copied to the destination +# (all the disk, only the sectors allocated in the topmost image, or +# only new I/O). +# +# @speed: #optional the maximum speed, in bytes per second. The default is 0, +# for unlimited. +# +# @on-source-error: #optional the action to take on an error on the source, +# default 'report'. 'stop' and 'enospc' can only be used +# if the block device supports io-status (see BlockInfo). +# +# @on-target-error: #optional the action to take on an error on the target, +# default 'report' (no limitations, since this applies to +# a different block device than @device). +# +# Note that @on-source-error and @on-target-error only affect background I/O. +# If an error occurs during a guest write request, the device's rerror/werror +# actions will be used. +# +# Since: 2.3 +## +{ 'type': 'BlockdevBackup', + 'data': { 'device': 'str', 'target': 'str', + 'sync': 'MirrorSyncMode', + '*speed': 'int', + '*on-source-error': 'BlockdevOnError', + '*on-target-error': 'BlockdevOnError' } } + ## # @blockdev-snapshot-sync # @@ -821,6 +856,25 @@ ## { 'command': 'drive-backup', 'data': 'DriveBackup' } +## +# @blockdev-backup +# +# Start a point-in-time copy of a block device to a new destination. The +# status of ongoing blockdev-backup operations can be checked with +# query-block-jobs where the BlockJobInfo.type field has the value 'backup'. +# The operation can be stopped before it has completed using the +# block-job-cancel command. +# +# For the arguments, see the documentation of BlockdevBackup. +# +# Returns: Nothing on success. +# If @device or @target is not a valid block device, DeviceNotFound. +# +# Since 2.3 +## +{ 'command': 'blockdev-backup', 'data': 'BlockdevBackup' } + + ## # @query-named-block-nodes # diff --git a/qmp-commands.hx b/qmp-commands.hx index 6945d30198..8957201f73 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -1094,6 +1094,48 @@ Example: "sync": "full", "target": "backup.img" } } <- { "return": {} } + +EQMP + + { + .name = "blockdev-backup", + .args_type = "sync:s,device:B,target:B,speed:i?," + "on-source-error:s?,on-target-error:s?", + .mhandler.cmd_new = qmp_marshal_input_blockdev_backup, + }, + +SQMP +blockdev-backup +--------------- + +The device version of drive-backup: this command takes an existing named device +as backup target. + +Arguments: + +- "device": the name of the device which should be copied. + (json-string) +- "target": the name of the backup target device. (json-string) +- "sync": what parts of the disk image should be copied to the destination; + possibilities include "full" for all the disk, "top" for only the + sectors allocated in the topmost image, or "none" to only replicate + new I/O (MirrorSyncMode). +- "speed": the maximum speed, in bytes per second (json-int, optional) +- "on-source-error": the action to take on an error on the source, default + 'report'. 'stop' and 'enospc' can only be used + if the block device supports io-status. + (BlockdevOnError, optional) +- "on-target-error": the action to take on an error on the target, default + 'report' (no limitations, since this applies to + a different block device than device). + (BlockdevOnError, optional) + +Example: +-> { "execute": "blockdev-backup", "arguments": { "device": "src-id", + "sync": "full", + "target": "tgt-id" } } +<- { "return": {} } + EQMP { From bd8baecddc3c45f1b3b8322fbf41a3c4b01ec1b7 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 18 Dec 2014 18:37:06 +0800 Subject: [PATCH 16/38] block: Add blockdev-backup to transaction Signed-off-by: Fam Zheng Reviewed-by: John Snow Reviewed-by: Max Reitz Reviewed-by: Markus Armbruster Message-id: 1418899027-8445-4-git-send-email-famz@redhat.com Signed-off-by: Max Reitz --- blockdev.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++ qapi-schema.json | 2 ++ 2 files changed, 81 insertions(+) diff --git a/blockdev.c b/blockdev.c index d6ccc5ebca..f2b3b25c10 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1559,6 +1559,79 @@ static void drive_backup_clean(BlkTransactionState *common) } } +typedef struct BlockdevBackupState { + BlkTransactionState common; + BlockDriverState *bs; + BlockJob *job; + AioContext *aio_context; +} BlockdevBackupState; + +static void blockdev_backup_prepare(BlkTransactionState *common, Error **errp) +{ + BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); + BlockdevBackup *backup; + BlockDriverState *bs, *target; + Error *local_err = NULL; + + assert(common->action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); + backup = common->action->blockdev_backup; + + bs = bdrv_find(backup->device); + if (!bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, backup->device); + return; + } + + target = bdrv_find(backup->target); + if (!target) { + error_set(errp, QERR_DEVICE_NOT_FOUND, backup->target); + return; + } + + /* AioContext is released in .clean() */ + state->aio_context = bdrv_get_aio_context(bs); + if (state->aio_context != bdrv_get_aio_context(target)) { + state->aio_context = NULL; + error_setg(errp, "Backup between two IO threads is not implemented"); + return; + } + aio_context_acquire(state->aio_context); + + qmp_blockdev_backup(backup->device, backup->target, + backup->sync, + backup->has_speed, backup->speed, + backup->has_on_source_error, backup->on_source_error, + backup->has_on_target_error, backup->on_target_error, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + state->bs = bs; + state->job = state->bs->job; +} + +static void blockdev_backup_abort(BlkTransactionState *common) +{ + BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); + BlockDriverState *bs = state->bs; + + /* Only cancel if it's the job we started */ + if (bs && bs->job && bs->job == state->job) { + block_job_cancel_sync(bs->job); + } +} + +static void blockdev_backup_clean(BlkTransactionState *common) +{ + BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); + + if (state->aio_context) { + aio_context_release(state->aio_context); + } +} + static void abort_prepare(BlkTransactionState *common, Error **errp) { error_setg(errp, "Transaction aborted using Abort action"); @@ -1582,6 +1655,12 @@ static const BdrvActionOps actions[] = { .abort = drive_backup_abort, .clean = drive_backup_clean, }, + [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = { + .instance_size = sizeof(BlockdevBackupState), + .prepare = blockdev_backup_prepare, + .abort = blockdev_backup_abort, + .clean = blockdev_backup_clean, + }, [TRANSACTION_ACTION_KIND_ABORT] = { .instance_size = sizeof(BlkTransactionState), .prepare = abort_prepare, diff --git a/qapi-schema.json b/qapi-schema.json index 47d99cf9e7..fbfc52f94d 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1260,11 +1260,13 @@ # drive-backup since 1.6 # abort since 1.6 # blockdev-snapshot-internal-sync since 1.7 +# blockdev-backup since 2.3 ## { 'union': 'TransactionAction', 'data': { 'blockdev-snapshot-sync': 'BlockdevSnapshot', 'drive-backup': 'DriveBackup', + 'blockdev-backup': 'BlockdevBackup', 'abort': 'Abort', 'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternal' } } From 7c6a4ab871cae829ea754c717db2d40c2c115224 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 18 Dec 2014 18:37:07 +0800 Subject: [PATCH 17/38] qemu-iotests: Test blockdev-backup in 055 This applies cases on drive-backup on blockdev-backup, except cases with target format and mode. Also add a case to check source == target. Signed-off-by: Fam Zheng Reviewed-by: Max Reitz Reviewed-by: Markus Armbruster Message-id: 1418899027-8445-5-git-send-email-famz@redhat.com Signed-off-by: Max Reitz --- tests/qemu-iotests/055 | 211 ++++++++++++++++++++++++++++++------- tests/qemu-iotests/055.out | 4 +- 2 files changed, 176 insertions(+), 39 deletions(-) diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055 index 0872444811..e81d4d0d83 100755 --- a/tests/qemu-iotests/055 +++ b/tests/qemu-iotests/055 @@ -1,8 +1,8 @@ #!/usr/bin/env python # -# Tests for drive-backup +# Tests for drive-backup and blockdev-backup # -# Copyright (C) 2013 Red Hat, Inc. +# Copyright (C) 2013, 2014 Red Hat, Inc. # # Based on 041. # @@ -27,6 +27,7 @@ from iotests import qemu_img, qemu_io test_img = os.path.join(iotests.test_dir, 'test.img') target_img = os.path.join(iotests.test_dir, 'target.img') +blockdev_target_img = os.path.join(iotests.test_dir, 'blockdev-target.img') class TestSingleDrive(iotests.QMPTestCase): image_len = 64 * 1024 * 1024 # MB @@ -38,34 +39,41 @@ class TestSingleDrive(iotests.QMPTestCase): qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xd5 1M 32k', test_img) qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 32M 124k', test_img) qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 67043328 64k', test_img) + qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(TestSingleDrive.image_len)) - self.vm = iotests.VM().add_drive(test_img) + self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img) self.vm.launch() def tearDown(self): self.vm.shutdown() os.remove(test_img) + os.remove(blockdev_target_img) try: os.remove(target_img) except OSError: pass - def test_cancel(self): + def do_test_cancel(self, cmd, target): self.assert_no_active_block_jobs() - result = self.vm.qmp('drive-backup', device='drive0', - target=target_img, sync='full') + result = self.vm.qmp(cmd, device='drive0', target=target, sync='full') self.assert_qmp(result, 'return', {}) event = self.cancel_and_wait() self.assert_qmp(event, 'data/type', 'backup') - def test_pause(self): + def test_cancel_drive_backup(self): + self.do_test_cancel('drive-backup', target_img) + + def test_cancel_blockdev_backup(self): + self.do_test_cancel('blockdev-backup', 'drive1') + + def do_test_pause(self, cmd, target, image): self.assert_no_active_block_jobs() self.vm.pause_drive('drive0') - result = self.vm.qmp('drive-backup', device='drive0', - target=target_img, sync='full') + result = self.vm.qmp(cmd, device='drive0', + target=target, sync='full') self.assert_qmp(result, 'return', {}) result = self.vm.qmp('block-job-pause', device='drive0') @@ -86,14 +94,25 @@ class TestSingleDrive(iotests.QMPTestCase): self.wait_until_completed() self.vm.shutdown() - self.assertTrue(iotests.compare_images(test_img, target_img), + self.assertTrue(iotests.compare_images(test_img, image), 'target image does not match source after backup') + def test_pause_drive_backup(self): + self.do_test_pause('drive-backup', target_img, target_img) + + def test_pause_blockdev_backup(self): + self.do_test_pause('blockdev-backup', 'drive1', blockdev_target_img) + def test_medium_not_found(self): result = self.vm.qmp('drive-backup', device='ide1-cd0', target=target_img, sync='full') self.assert_qmp(result, 'error/class', 'GenericError') + def test_medium_not_found_blockdev_backup(self): + result = self.vm.qmp('blockdev-backup', device='ide1-cd0', + target='drive1', sync='full') + self.assert_qmp(result, 'error/class', 'GenericError') + def test_image_not_found(self): result = self.vm.qmp('drive-backup', device='drive0', target=target_img, sync='full', mode='existing') @@ -105,31 +124,53 @@ class TestSingleDrive(iotests.QMPTestCase): format='spaghetti-noodles') self.assert_qmp(result, 'error/class', 'GenericError') - def test_device_not_found(self): - result = self.vm.qmp('drive-backup', device='nonexistent', - target=target_img, sync='full') + def do_test_device_not_found(self, cmd, **args): + result = self.vm.qmp(cmd, **args) self.assert_qmp(result, 'error/class', 'DeviceNotFound') + def test_device_not_found(self): + self.do_test_device_not_found('drive-backup', device='nonexistent', + target=target_img, sync='full') + + self.do_test_device_not_found('blockdev-backup', device='nonexistent', + target='drive0', sync='full') + + self.do_test_device_not_found('blockdev-backup', device='drive0', + target='nonexistent', sync='full') + + self.do_test_device_not_found('blockdev-backup', device='nonexistent', + target='nonexistent', sync='full') + + def test_target_is_source(self): + result = self.vm.qmp('blockdev-backup', device='drive0', + target='drive0', sync='full') + self.assert_qmp(result, 'error/class', 'GenericError') + class TestSetSpeed(iotests.QMPTestCase): image_len = 80 * 1024 * 1024 # MB def setUp(self): qemu_img('create', '-f', iotests.imgfmt, test_img, str(TestSetSpeed.image_len)) qemu_io('-f', iotests.imgfmt, '-c', 'write -P1 0 512', test_img) - self.vm = iotests.VM().add_drive(test_img) + qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(TestSingleDrive.image_len)) + + self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img) self.vm.launch() def tearDown(self): self.vm.shutdown() os.remove(test_img) - os.remove(target_img) + os.remove(blockdev_target_img) + try: + os.remove(target_img) + except OSError: + pass - def test_set_speed(self): + def do_test_set_speed(self, cmd, target): self.assert_no_active_block_jobs() self.vm.pause_drive('drive0') - result = self.vm.qmp('drive-backup', device='drive0', - target=target_img, sync='full') + result = self.vm.qmp(cmd, device='drive0', target=target, sync='full') self.assert_qmp(result, 'return', {}) # Default speed is 0 @@ -148,10 +189,10 @@ class TestSetSpeed(iotests.QMPTestCase): event = self.cancel_and_wait(resume=True) self.assert_qmp(event, 'data/type', 'backup') - # Check setting speed in drive-backup works + # Check setting speed option works self.vm.pause_drive('drive0') - result = self.vm.qmp('drive-backup', device='drive0', - target=target_img, sync='full', speed=4*1024*1024) + result = self.vm.qmp(cmd, device='drive0', + target=target, sync='full', speed=4*1024*1024) self.assert_qmp(result, 'return', {}) result = self.vm.qmp('query-block-jobs') @@ -161,18 +202,24 @@ class TestSetSpeed(iotests.QMPTestCase): event = self.cancel_and_wait(resume=True) self.assert_qmp(event, 'data/type', 'backup') - def test_set_speed_invalid(self): + def test_set_speed_drive_backup(self): + self.do_test_set_speed('drive-backup', target_img) + + def test_set_speed_blockdev_backup(self): + self.do_test_set_speed('blockdev-backup', 'drive1') + + def do_test_set_speed_invalid(self, cmd, target): self.assert_no_active_block_jobs() - result = self.vm.qmp('drive-backup', device='drive0', - target=target_img, sync='full', speed=-1) + result = self.vm.qmp(cmd, device='drive0', + target=target, sync='full', speed=-1) self.assert_qmp(result, 'error/class', 'GenericError') self.assert_no_active_block_jobs() self.vm.pause_drive('drive0') - result = self.vm.qmp('drive-backup', device='drive0', - target=target_img, sync='full') + result = self.vm.qmp(cmd, device='drive0', + target=target, sync='full') self.assert_qmp(result, 'return', {}) result = self.vm.qmp('block-job-set-speed', device='drive0', speed=-1) @@ -181,6 +228,12 @@ class TestSetSpeed(iotests.QMPTestCase): event = self.cancel_and_wait(resume=True) self.assert_qmp(event, 'data/type', 'backup') + def test_set_speed_invalid_drive_backup(self): + self.do_test_set_speed_invalid('drive-backup', target_img) + + def test_set_speed_invalid_blockdev_backup(self): + self.do_test_set_speed_invalid('blockdev-backup', 'drive1') + class TestSingleTransaction(iotests.QMPTestCase): image_len = 64 * 1024 * 1024 # MB @@ -190,41 +243,50 @@ class TestSingleTransaction(iotests.QMPTestCase): qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xd5 1M 32k', test_img) qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 32M 124k', test_img) qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 67043328 64k', test_img) + qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(TestSingleDrive.image_len)) - self.vm = iotests.VM().add_drive(test_img) + self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img) self.vm.launch() def tearDown(self): self.vm.shutdown() os.remove(test_img) + os.remove(blockdev_target_img) try: os.remove(target_img) except OSError: pass - def test_cancel(self): + def do_test_cancel(self, cmd, target): self.assert_no_active_block_jobs() result = self.vm.qmp('transaction', actions=[{ - 'type': 'drive-backup', + 'type': cmd, 'data': { 'device': 'drive0', - 'target': target_img, + 'target': target, 'sync': 'full' }, } ]) + self.assert_qmp(result, 'return', {}) event = self.cancel_and_wait() self.assert_qmp(event, 'data/type', 'backup') - def test_pause(self): + def test_cancel_drive_backup(self): + self.do_test_cancel('drive-backup', target_img) + + def test_cancel_blockdev_backup(self): + self.do_test_cancel('blockdev-backup', 'drive1') + + def do_test_pause(self, cmd, target, image): self.assert_no_active_block_jobs() self.vm.pause_drive('drive0') result = self.vm.qmp('transaction', actions=[{ - 'type': 'drive-backup', + 'type': cmd, 'data': { 'device': 'drive0', - 'target': target_img, + 'target': target, 'sync': 'full' }, } ]) @@ -248,19 +310,31 @@ class TestSingleTransaction(iotests.QMPTestCase): self.wait_until_completed() self.vm.shutdown() - self.assertTrue(iotests.compare_images(test_img, target_img), + self.assertTrue(iotests.compare_images(test_img, image), 'target image does not match source after backup') - def test_medium_not_found(self): + def test_pause_drive_backup(self): + self.do_test_pause('drive-backup', target_img, target_img) + + def test_pause_blockdev_backup(self): + self.do_test_pause('blockdev-backup', 'drive1', blockdev_target_img) + + def do_test_medium_not_found(self, cmd, target): result = self.vm.qmp('transaction', actions=[{ - 'type': 'drive-backup', + 'type': cmd, 'data': { 'device': 'ide1-cd0', - 'target': target_img, + 'target': target, 'sync': 'full' }, } ]) self.assert_qmp(result, 'error/class', 'GenericError') + def test_medium_not_found_drive_backup(self): + self.do_test_medium_not_found('drive-backup', target_img) + + def test_medium_not_found_blockdev_backup(self): + self.do_test_medium_not_found('blockdev-backup', 'drive1') + def test_image_not_found(self): result = self.vm.qmp('transaction', actions=[{ 'type': 'drive-backup', @@ -283,6 +357,43 @@ class TestSingleTransaction(iotests.QMPTestCase): ]) self.assert_qmp(result, 'error/class', 'DeviceNotFound') + result = self.vm.qmp('transaction', actions=[{ + 'type': 'blockdev-backup', + 'data': { 'device': 'nonexistent', + 'target': 'drive1', + 'sync': 'full' }, + } + ]) + self.assert_qmp(result, 'error/class', 'DeviceNotFound') + + result = self.vm.qmp('transaction', actions=[{ + 'type': 'blockdev-backup', + 'data': { 'device': 'drive0', + 'target': 'nonexistent', + 'sync': 'full' }, + } + ]) + self.assert_qmp(result, 'error/class', 'DeviceNotFound') + + result = self.vm.qmp('transaction', actions=[{ + 'type': 'blockdev-backup', + 'data': { 'device': 'nonexistent', + 'target': 'nonexistent', + 'sync': 'full' }, + } + ]) + self.assert_qmp(result, 'error/class', 'DeviceNotFound') + + def test_target_is_source(self): + result = self.vm.qmp('transaction', actions=[{ + 'type': 'blockdev-backup', + 'data': { 'device': 'drive0', + 'target': 'drive0', + 'sync': 'full' }, + } + ]) + self.assert_qmp(result, 'error/class', 'GenericError') + def test_abort(self): result = self.vm.qmp('transaction', actions=[{ 'type': 'drive-backup', @@ -298,5 +409,31 @@ class TestSingleTransaction(iotests.QMPTestCase): self.assert_qmp(result, 'error/class', 'GenericError') self.assert_no_active_block_jobs() + result = self.vm.qmp('transaction', actions=[{ + 'type': 'blockdev-backup', + 'data': { 'device': 'nonexistent', + 'target': 'drive1', + 'sync': 'full' }, + }, { + 'type': 'Abort', + 'data': {}, + } + ]) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_no_active_block_jobs() + + result = self.vm.qmp('transaction', actions=[{ + 'type': 'blockdev-backup', + 'data': { 'device': 'drive0', + 'target': 'nonexistent', + 'sync': 'full' }, + }, { + 'type': 'Abort', + 'data': {}, + } + ]) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_no_active_block_jobs() + if __name__ == '__main__': iotests.main(supported_fmts=['raw', 'qcow2']) diff --git a/tests/qemu-iotests/055.out b/tests/qemu-iotests/055.out index 6323079e08..42314e9c00 100644 --- a/tests/qemu-iotests/055.out +++ b/tests/qemu-iotests/055.out @@ -1,5 +1,5 @@ -.............. +........................ ---------------------------------------------------------------------- -Ran 14 tests +Ran 24 tests OK From 4dd7b8d30cfa1aebee547958db27efd581a58d9b Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 19 Dec 2014 17:17:06 +0100 Subject: [PATCH 18/38] iotests: Filter out "I/O thread spun..." warning Filter out the "main loop: WARNING: I/O thread spun for..." warning from qemu output (it hardly matters for code specifically testing I/O). Furthermore, use _filter_qemu in all the custom functions which run qemu. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- tests/qemu-iotests/067 | 3 ++- tests/qemu-iotests/071 | 2 +- tests/qemu-iotests/071.out | 8 ++++---- tests/qemu-iotests/081 | 2 +- tests/qemu-iotests/087 | 3 ++- tests/qemu-iotests/087.out | 1 - tests/qemu-iotests/099 | 2 +- tests/qemu-iotests/common.filter | 1 + 8 files changed, 12 insertions(+), 10 deletions(-) diff --git a/tests/qemu-iotests/067 b/tests/qemu-iotests/067 index 29cd6b5aff..0508c696a8 100755 --- a/tests/qemu-iotests/067 +++ b/tests/qemu-iotests/067 @@ -45,7 +45,8 @@ function do_run_qemu() function run_qemu() { - do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g' + do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu \ + | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g' } size=128M diff --git a/tests/qemu-iotests/071 b/tests/qemu-iotests/071 index 5d61ef6d81..9eaa49b419 100755 --- a/tests/qemu-iotests/071 +++ b/tests/qemu-iotests/071 @@ -51,7 +51,7 @@ function do_run_qemu() function run_qemu() { - do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu_io + do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qemu | _filter_qmp | _filter_qemu_io } IMG_SIZE=64M diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out index 46484ff69c..9205ce2512 100644 --- a/tests/qemu-iotests/071.out +++ b/tests/qemu-iotests/071.out @@ -52,8 +52,8 @@ read failed: Input/output error {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}} -qemu-system-x86_64: Failed to flush the L2 table cache: Input/output error -qemu-system-x86_64: Failed to flush the refcount block cache: Input/output error +QEMU_PROG: Failed to flush the L2 table cache: Input/output error +QEMU_PROG: Failed to flush the refcount block cache: Input/output error === Testing blkverify on existing block device === @@ -92,7 +92,7 @@ read failed: Input/output error {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}} -qemu-system-x86_64: Failed to flush the L2 table cache: Input/output error -qemu-system-x86_64: Failed to flush the refcount block cache: Input/output error +QEMU_PROG: Failed to flush the L2 table cache: Input/output error +QEMU_PROG: Failed to flush the refcount block cache: Input/output error *** done diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081 index 9ab93ff89e..d9b042cfc7 100755 --- a/tests/qemu-iotests/081 +++ b/tests/qemu-iotests/081 @@ -53,7 +53,7 @@ function do_run_qemu() function run_qemu() { - do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu_io + do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qemu | _filter_qmp | _filter_qemu_io } test_quorum=$($QEMU_IMG --help|grep quorum) diff --git a/tests/qemu-iotests/087 b/tests/qemu-iotests/087 index d7454d13da..8694749947 100755 --- a/tests/qemu-iotests/087 +++ b/tests/qemu-iotests/087 @@ -45,7 +45,8 @@ function do_run_qemu() function run_qemu() { - do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g' + do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu \ + | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g' } size=128M diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out index 7d38cc26c5..91f4ea1a8b 100644 --- a/tests/qemu-iotests/087.out +++ b/tests/qemu-iotests/087.out @@ -21,7 +21,6 @@ QMP_VERSION {"return": {}} {"error": {"class": "GenericError", "desc": "Device with id 'disk' already exists"}} {"error": {"class": "GenericError", "desc": "Device name 'test-node' conflicts with an existing node name"}} -main-loop: WARNING: I/O thread spun for 1000 iterations {"error": {"class": "GenericError", "desc": "could not open disk image disk2: node-name=disk is conflicting with a device id"}} {"error": {"class": "GenericError", "desc": "could not open disk image disk2: Duplicate node name"}} {"error": {"class": "GenericError", "desc": "could not open disk image disk3: node-name=disk3 is conflicting with a device id"}} diff --git a/tests/qemu-iotests/099 b/tests/qemu-iotests/099 index 948afff28b..80f3d9aaf3 100755 --- a/tests/qemu-iotests/099 +++ b/tests/qemu-iotests/099 @@ -57,7 +57,7 @@ function run_qemu() # Get the "file": "foo" entry ($foo may only contain escaped double quotes, # which is how we can extract it) do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_imgfmt | _filter_qmp \ - | grep "drv0" \ + | _filter_qemu | grep "drv0" \ | sed -e 's/^.*"file": "\(\(\\"\|[^"]\)*\)".*$/\1/' -e 's/\\"/"/g' } diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter index 6c14590594..bae96efcf3 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -159,6 +159,7 @@ _filter_qemu() { sed -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \ -e 's#^QEMU [0-9]\+\.[0-9]\+\.[0-9]\+ monitor#QEMU X.Y.Z monitor#' \ + -e '/main-loop: WARNING: I\/O thread spun for [0-9]\+ iterations/d' \ -e $'s#\r##' # QEMU monitor uses \r\n line endings } From 04636dc410b163c2243e66c3813dd4900a50a4ed Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Tue, 30 Dec 2014 13:04:16 +0300 Subject: [PATCH 19/38] migration/block: fix pending() return value Because of wrong return value of .save_live_pending() in migration/block.c, migration finishes before the whole disk is transferred. Such situation occurs when the migration process is fast enough, for example when source and dest are on the same host. If in the bulk phase we return something < max_size, we will skip transferring the tail of the device. Currently we have "set pending to BLOCK_SIZE if it is zero" for bulk phase, but there no guarantee, that it will be < max_size. True approach is to return, for example, max_size+1 when we are in the bulk phase. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-id: 1419933856-4018-2-git-send-email-vsementsov@parallels.com Signed-off-by: Stefan Hajnoczi --- migration/block.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/migration/block.c b/migration/block.c index a0f908cf98..0c7610600b 100644 --- a/migration/block.c +++ b/migration/block.c @@ -766,8 +766,8 @@ static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) block_mig_state.read_done * BLOCK_SIZE; /* Report at least one block pending during bulk phase */ - if (pending == 0 && !block_mig_state.bulk_completed) { - pending = BLOCK_SIZE; + if (pending <= max_size && !block_mig_state.bulk_completed) { + pending = max_size + BLOCK_SIZE; } blk_mig_unlock(); qemu_mutex_unlock_iothread(); From 292be092ad48ac530dd254ada109851e9a2353f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Mar=C3=AD?= Date: Thu, 23 Oct 2014 10:12:42 +0200 Subject: [PATCH 20/38] libqos: Convert malloc-pc allocator to a generic allocator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The allocator in malloc-pc has been extracted, so it can be used in every arch. This operation showed that both the alloc and free functions can be also generic. Because of this, the QGuestAllocator has been removed from is function to wrap the alloc and free function, and now just contains the allocator parameters. As a result, only the allocator initalizer and unitializer are arch dependent. Signed-off-by: Marc Marí Reviewed-by: Stefan Hajnoczi Reviewed-by: John Snow Signed-off-by: Stefan Hajnoczi --- tests/Makefile | 2 +- tests/libqos/malloc-pc.c | 280 +-------------------------------------- tests/libqos/malloc-pc.h | 11 +- tests/libqos/malloc.c | 270 +++++++++++++++++++++++++++++++++++++ tests/libqos/malloc.h | 45 ++++--- 5 files changed, 309 insertions(+), 299 deletions(-) create mode 100644 tests/libqos/malloc.c diff --git a/tests/Makefile b/tests/Makefile index e4ddb6a8c1..c2e2e52f22 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -298,7 +298,7 @@ tests/test-opts-visitor$(EXESUF): tests/test-opts-visitor.o $(test-qapi-obj-y) l tests/test-mul64$(EXESUF): tests/test-mul64.o libqemuutil.a tests/test-bitops$(EXESUF): tests/test-bitops.o libqemuutil.a -libqos-obj-y = tests/libqos/pci.o tests/libqos/fw_cfg.o +libqos-obj-y = tests/libqos/pci.o tests/libqos/fw_cfg.o tests/libqos/malloc.o libqos-obj-y += tests/libqos/i2c.o libqos-pc-obj-y = $(libqos-obj-y) tests/libqos/pci-pc.o libqos-pc-obj-y += tests/libqos/malloc-pc.o diff --git a/tests/libqos/malloc-pc.c b/tests/libqos/malloc-pc.c index f4218c6451..c9c48fddc9 100644 --- a/tests/libqos/malloc-pc.c +++ b/tests/libqos/malloc-pc.c @@ -17,296 +17,28 @@ #include "hw/nvram/fw_cfg.h" #include "qemu-common.h" -#include "qemu/queue.h" #include #define PAGE_SIZE (4096) -#define MLIST_ENTNAME entries -typedef QTAILQ_HEAD(MemList, MemBlock) MemList; -typedef struct MemBlock { - QTAILQ_ENTRY(MemBlock) MLIST_ENTNAME; - uint64_t size; - uint64_t addr; -} MemBlock; - -typedef struct PCAlloc -{ - QGuestAllocator alloc; - PCAllocOpts opts; - uint64_t start; - uint64_t end; - - MemList used; - MemList free; -} PCAlloc; - -static MemBlock *mlist_new(uint64_t addr, uint64_t size) -{ - MemBlock *block; - - if (!size) { - return NULL; - } - block = g_malloc0(sizeof(MemBlock)); - - block->addr = addr; - block->size = size; - - return block; -} - -static void mlist_delete(MemList *list, MemBlock *node) -{ - g_assert(list && node); - QTAILQ_REMOVE(list, node, MLIST_ENTNAME); - g_free(node); -} - -static MemBlock *mlist_find_key(MemList *head, uint64_t addr) -{ - MemBlock *node; - QTAILQ_FOREACH(node, head, MLIST_ENTNAME) { - if (node->addr == addr) { - return node; - } - } - return NULL; -} - -static MemBlock *mlist_find_space(MemList *head, uint64_t size) -{ - MemBlock *node; - - QTAILQ_FOREACH(node, head, MLIST_ENTNAME) { - if (node->size >= size) { - return node; - } - } - return NULL; -} - -static MemBlock *mlist_sort_insert(MemList *head, MemBlock *insr) -{ - MemBlock *node; - g_assert(head && insr); - - QTAILQ_FOREACH(node, head, MLIST_ENTNAME) { - if (insr->addr < node->addr) { - QTAILQ_INSERT_BEFORE(node, insr, MLIST_ENTNAME); - return insr; - } - } - - QTAILQ_INSERT_TAIL(head, insr, MLIST_ENTNAME); - return insr; -} - -static inline uint64_t mlist_boundary(MemBlock *node) -{ - return node->size + node->addr; -} - -static MemBlock *mlist_join(MemList *head, MemBlock *left, MemBlock *right) -{ - g_assert(head && left && right); - - left->size += right->size; - mlist_delete(head, right); - return left; -} - -static void mlist_coalesce(MemList *head, MemBlock *node) -{ - g_assert(node); - MemBlock *left; - MemBlock *right; - char merge; - - do { - merge = 0; - left = QTAILQ_PREV(node, MemList, MLIST_ENTNAME); - right = QTAILQ_NEXT(node, MLIST_ENTNAME); - - /* clowns to the left of me */ - if (left && mlist_boundary(left) == node->addr) { - node = mlist_join(head, left, node); - merge = 1; - } - - /* jokers to the right */ - if (right && mlist_boundary(node) == right->addr) { - node = mlist_join(head, node, right); - merge = 1; - } - - } while (merge); -} - -static uint64_t pc_mlist_fulfill(PCAlloc *s, MemBlock *freenode, uint64_t size) -{ - uint64_t addr; - MemBlock *usednode; - - g_assert(freenode); - g_assert_cmpint(freenode->size, >=, size); - - addr = freenode->addr; - if (freenode->size == size) { - /* re-use this freenode as our used node */ - QTAILQ_REMOVE(&s->free, freenode, MLIST_ENTNAME); - usednode = freenode; - } else { - /* adjust the free node and create a new used node */ - freenode->addr += size; - freenode->size -= size; - usednode = mlist_new(addr, size); - } - - mlist_sort_insert(&s->used, usednode); - return addr; -} - -/* To assert the correctness of the list. - * Used only if PC_ALLOC_PARANOID is set. */ -static void pc_mlist_check(PCAlloc *s) -{ - MemBlock *node; - uint64_t addr = s->start > 0 ? s->start - 1 : 0; - uint64_t next = s->start; - - QTAILQ_FOREACH(node, &s->free, MLIST_ENTNAME) { - g_assert_cmpint(node->addr, >, addr); - g_assert_cmpint(node->addr, >=, next); - addr = node->addr; - next = node->addr + node->size; - } - - addr = s->start > 0 ? s->start - 1 : 0; - next = s->start; - QTAILQ_FOREACH(node, &s->used, MLIST_ENTNAME) { - g_assert_cmpint(node->addr, >, addr); - g_assert_cmpint(node->addr, >=, next); - addr = node->addr; - next = node->addr + node->size; - } -} - -static uint64_t pc_mlist_alloc(PCAlloc *s, uint64_t size) -{ - MemBlock *node; - - node = mlist_find_space(&s->free, size); - if (!node) { - fprintf(stderr, "Out of guest memory.\n"); - g_assert_not_reached(); - } - return pc_mlist_fulfill(s, node, size); -} - -static void pc_mlist_free(PCAlloc *s, uint64_t addr) -{ - MemBlock *node; - - if (addr == 0) { - return; - } - - node = mlist_find_key(&s->used, addr); - if (!node) { - fprintf(stderr, "Error: no record found for an allocation at " - "0x%016" PRIx64 ".\n", - addr); - g_assert_not_reached(); - } - - /* Rip it out of the used list and re-insert back into the free list. */ - QTAILQ_REMOVE(&s->used, node, MLIST_ENTNAME); - mlist_sort_insert(&s->free, node); - mlist_coalesce(&s->free, node); -} - -static uint64_t pc_alloc(QGuestAllocator *allocator, size_t size) -{ - PCAlloc *s = container_of(allocator, PCAlloc, alloc); - uint64_t rsize = size; - uint64_t naddr; - - rsize += (PAGE_SIZE - 1); - rsize &= -PAGE_SIZE; - g_assert_cmpint((s->start + rsize), <=, s->end); - g_assert_cmpint(rsize, >=, size); - - naddr = pc_mlist_alloc(s, rsize); - if (s->opts & PC_ALLOC_PARANOID) { - pc_mlist_check(s); - } - - return naddr; -} - -static void pc_free(QGuestAllocator *allocator, uint64_t addr) -{ - PCAlloc *s = container_of(allocator, PCAlloc, alloc); - - pc_mlist_free(s, addr); - if (s->opts & PC_ALLOC_PARANOID) { - pc_mlist_check(s); - } -} - /* * Mostly for valgrind happiness, but it does offer * a chokepoint for debugging guest memory leaks, too. */ void pc_alloc_uninit(QGuestAllocator *allocator) { - PCAlloc *s = container_of(allocator, PCAlloc, alloc); - MemBlock *node; - MemBlock *tmp; - PCAllocOpts mask; - - /* Check for guest leaks, and destroy the list. */ - QTAILQ_FOREACH_SAFE(node, &s->used, MLIST_ENTNAME, tmp) { - if (s->opts & (PC_ALLOC_LEAK_WARN | PC_ALLOC_LEAK_ASSERT)) { - fprintf(stderr, "guest malloc leak @ 0x%016" PRIx64 "; " - "size 0x%016" PRIx64 ".\n", - node->addr, node->size); - } - if (s->opts & (PC_ALLOC_LEAK_ASSERT)) { - g_assert_not_reached(); - } - g_free(node); - } - - /* If we have previously asserted that there are no leaks, then there - * should be only one node here with a specific address and size. */ - mask = PC_ALLOC_LEAK_ASSERT | PC_ALLOC_PARANOID; - QTAILQ_FOREACH_SAFE(node, &s->free, MLIST_ENTNAME, tmp) { - if ((s->opts & mask) == mask) { - if ((node->addr != s->start) || - (node->size != s->end - s->start)) { - fprintf(stderr, "Free list is corrupted.\n"); - g_assert_not_reached(); - } - } - - g_free(node); - } - - g_free(s); + alloc_uninit(allocator); } -QGuestAllocator *pc_alloc_init_flags(PCAllocOpts flags) +QGuestAllocator *pc_alloc_init_flags(QAllocOpts flags) { - PCAlloc *s = g_malloc0(sizeof(*s)); + QGuestAllocator *s = g_malloc0(sizeof(*s)); uint64_t ram_size; QFWCFG *fw_cfg = pc_fw_cfg_init(); MemBlock *node; s->opts = flags; - s->alloc.alloc = pc_alloc; - s->alloc.free = pc_free; + s->page_size = PAGE_SIZE; ram_size = qfw_cfg_get_u64(fw_cfg, FW_CFG_RAM_SIZE); @@ -325,10 +57,10 @@ QGuestAllocator *pc_alloc_init_flags(PCAllocOpts flags) node = mlist_new(s->start, s->end - s->start); QTAILQ_INSERT_HEAD(&s->free, node, MLIST_ENTNAME); - return &s->alloc; + return s; } inline QGuestAllocator *pc_alloc_init(void) { - return pc_alloc_init_flags(PC_ALLOC_NO_FLAGS); + return pc_alloc_init_flags(ALLOC_NO_FLAGS); } diff --git a/tests/libqos/malloc-pc.h b/tests/libqos/malloc-pc.h index 9f525e3b99..86ab9f0429 100644 --- a/tests/libqos/malloc-pc.h +++ b/tests/libqos/malloc-pc.h @@ -15,15 +15,8 @@ #include "libqos/malloc.h" -typedef enum { - PC_ALLOC_NO_FLAGS = 0x00, - PC_ALLOC_LEAK_WARN = 0x01, - PC_ALLOC_LEAK_ASSERT = 0x02, - PC_ALLOC_PARANOID = 0x04 -} PCAllocOpts; - QGuestAllocator *pc_alloc_init(void); -QGuestAllocator *pc_alloc_init_flags(PCAllocOpts flags); -void pc_alloc_uninit(QGuestAllocator *allocator); +QGuestAllocator *pc_alloc_init_flags(QAllocOpts flags); +void pc_alloc_uninit(QGuestAllocator *allocator); #endif diff --git a/tests/libqos/malloc.c b/tests/libqos/malloc.c new file mode 100644 index 0000000000..5debf18497 --- /dev/null +++ b/tests/libqos/malloc.c @@ -0,0 +1,270 @@ +/* + * libqos malloc support + * + * Copyright (c) 2014 + * + * Author: + * John Snow + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "libqos/malloc.h" +#include "qemu-common.h" +#include +#include +#include + +static void mlist_delete(MemList *list, MemBlock *node) +{ + g_assert(list && node); + QTAILQ_REMOVE(list, node, MLIST_ENTNAME); + g_free(node); +} + +static MemBlock *mlist_find_key(MemList *head, uint64_t addr) +{ + MemBlock *node; + QTAILQ_FOREACH(node, head, MLIST_ENTNAME) { + if (node->addr == addr) { + return node; + } + } + return NULL; +} + +static MemBlock *mlist_find_space(MemList *head, uint64_t size) +{ + MemBlock *node; + + QTAILQ_FOREACH(node, head, MLIST_ENTNAME) { + if (node->size >= size) { + return node; + } + } + return NULL; +} + +static MemBlock *mlist_sort_insert(MemList *head, MemBlock *insr) +{ + MemBlock *node; + g_assert(head && insr); + + QTAILQ_FOREACH(node, head, MLIST_ENTNAME) { + if (insr->addr < node->addr) { + QTAILQ_INSERT_BEFORE(node, insr, MLIST_ENTNAME); + return insr; + } + } + + QTAILQ_INSERT_TAIL(head, insr, MLIST_ENTNAME); + return insr; +} + +static inline uint64_t mlist_boundary(MemBlock *node) +{ + return node->size + node->addr; +} + +static MemBlock *mlist_join(MemList *head, MemBlock *left, MemBlock *right) +{ + g_assert(head && left && right); + + left->size += right->size; + mlist_delete(head, right); + return left; +} + +static void mlist_coalesce(MemList *head, MemBlock *node) +{ + g_assert(node); + MemBlock *left; + MemBlock *right; + char merge; + + do { + merge = 0; + left = QTAILQ_PREV(node, MemList, MLIST_ENTNAME); + right = QTAILQ_NEXT(node, MLIST_ENTNAME); + + /* clowns to the left of me */ + if (left && mlist_boundary(left) == node->addr) { + node = mlist_join(head, left, node); + merge = 1; + } + + /* jokers to the right */ + if (right && mlist_boundary(node) == right->addr) { + node = mlist_join(head, node, right); + merge = 1; + } + + } while (merge); +} + +static uint64_t mlist_fulfill(QGuestAllocator *s, MemBlock *freenode, + uint64_t size) +{ + uint64_t addr; + MemBlock *usednode; + + g_assert(freenode); + g_assert_cmpint(freenode->size, >=, size); + + addr = freenode->addr; + if (freenode->size == size) { + /* re-use this freenode as our used node */ + QTAILQ_REMOVE(&s->free, freenode, MLIST_ENTNAME); + usednode = freenode; + } else { + /* adjust the free node and create a new used node */ + freenode->addr += size; + freenode->size -= size; + usednode = mlist_new(addr, size); + } + + mlist_sort_insert(&s->used, usednode); + return addr; +} + +/* To assert the correctness of the list. + * Used only if ALLOC_PARANOID is set. */ +static void mlist_check(QGuestAllocator *s) +{ + MemBlock *node; + uint64_t addr = s->start > 0 ? s->start - 1 : 0; + uint64_t next = s->start; + + QTAILQ_FOREACH(node, &s->free, MLIST_ENTNAME) { + g_assert_cmpint(node->addr, >, addr); + g_assert_cmpint(node->addr, >=, next); + addr = node->addr; + next = node->addr + node->size; + } + + addr = s->start > 0 ? s->start - 1 : 0; + next = s->start; + QTAILQ_FOREACH(node, &s->used, MLIST_ENTNAME) { + g_assert_cmpint(node->addr, >, addr); + g_assert_cmpint(node->addr, >=, next); + addr = node->addr; + next = node->addr + node->size; + } +} + +static uint64_t mlist_alloc(QGuestAllocator *s, uint64_t size) +{ + MemBlock *node; + + node = mlist_find_space(&s->free, size); + if (!node) { + fprintf(stderr, "Out of guest memory.\n"); + g_assert_not_reached(); + } + return mlist_fulfill(s, node, size); +} + +static void mlist_free(QGuestAllocator *s, uint64_t addr) +{ + MemBlock *node; + + if (addr == 0) { + return; + } + + node = mlist_find_key(&s->used, addr); + if (!node) { + fprintf(stderr, "Error: no record found for an allocation at " + "0x%016" PRIx64 ".\n", + addr); + g_assert_not_reached(); + } + + /* Rip it out of the used list and re-insert back into the free list. */ + QTAILQ_REMOVE(&s->used, node, MLIST_ENTNAME); + mlist_sort_insert(&s->free, node); + mlist_coalesce(&s->free, node); +} + +MemBlock *mlist_new(uint64_t addr, uint64_t size) +{ + MemBlock *block; + + if (!size) { + return NULL; + } + block = g_malloc0(sizeof(MemBlock)); + + block->addr = addr; + block->size = size; + + return block; +} + +/* + * Mostly for valgrind happiness, but it does offer + * a chokepoint for debugging guest memory leaks, too. + */ +void alloc_uninit(QGuestAllocator *allocator) +{ + MemBlock *node; + MemBlock *tmp; + QAllocOpts mask; + + /* Check for guest leaks, and destroy the list. */ + QTAILQ_FOREACH_SAFE(node, &allocator->used, MLIST_ENTNAME, tmp) { + if (allocator->opts & (ALLOC_LEAK_WARN | ALLOC_LEAK_ASSERT)) { + fprintf(stderr, "guest malloc leak @ 0x%016" PRIx64 "; " + "size 0x%016" PRIx64 ".\n", + node->addr, node->size); + } + if (allocator->opts & (ALLOC_LEAK_ASSERT)) { + g_assert_not_reached(); + } + g_free(node); + } + + /* If we have previously asserted that there are no leaks, then there + * should be only one node here with a specific address and size. */ + mask = ALLOC_LEAK_ASSERT | ALLOC_PARANOID; + QTAILQ_FOREACH_SAFE(node, &allocator->free, MLIST_ENTNAME, tmp) { + if ((allocator->opts & mask) == mask) { + if ((node->addr != allocator->start) || + (node->size != allocator->end - allocator->start)) { + fprintf(stderr, "Free list is corrupted.\n"); + g_assert_not_reached(); + } + } + + g_free(node); + } + + g_free(allocator); +} + +uint64_t guest_alloc(QGuestAllocator *allocator, size_t size) +{ + uint64_t rsize = size; + uint64_t naddr; + + rsize += (allocator->page_size - 1); + rsize &= -allocator->page_size; + g_assert_cmpint((allocator->start + rsize), <=, allocator->end); + g_assert_cmpint(rsize, >=, size); + + naddr = mlist_alloc(allocator, rsize); + if (allocator->opts & ALLOC_PARANOID) { + mlist_check(allocator); + } + + return naddr; +} + +void guest_free(QGuestAllocator *allocator, uint64_t addr) +{ + mlist_free(allocator, addr); + if (allocator->opts & ALLOC_PARANOID) { + mlist_check(allocator); + } +} diff --git a/tests/libqos/malloc.h b/tests/libqos/malloc.h index 556538121e..465efeb8fb 100644 --- a/tests/libqos/malloc.h +++ b/tests/libqos/malloc.h @@ -15,24 +15,39 @@ #include #include +#include "qemu/queue.h" -typedef struct QGuestAllocator QGuestAllocator; +#define MLIST_ENTNAME entries -struct QGuestAllocator -{ - uint64_t (*alloc)(QGuestAllocator *allocator, size_t size); - void (*free)(QGuestAllocator *allocator, uint64_t addr); -}; +typedef enum { + ALLOC_NO_FLAGS = 0x00, + ALLOC_LEAK_WARN = 0x01, + ALLOC_LEAK_ASSERT = 0x02, + ALLOC_PARANOID = 0x04 +} QAllocOpts; + +typedef QTAILQ_HEAD(MemList, MemBlock) MemList; +typedef struct MemBlock { + QTAILQ_ENTRY(MemBlock) MLIST_ENTNAME; + uint64_t size; + uint64_t addr; +} MemBlock; + +typedef struct QGuestAllocator { + QAllocOpts opts; + uint64_t start; + uint64_t end; + uint32_t page_size; + + MemList used; + MemList free; +} QGuestAllocator; + +MemBlock *mlist_new(uint64_t addr, uint64_t size); +void alloc_uninit(QGuestAllocator *allocator); /* Always returns page aligned values */ -static inline uint64_t guest_alloc(QGuestAllocator *allocator, size_t size) -{ - return allocator->alloc(allocator, size); -} - -static inline void guest_free(QGuestAllocator *allocator, uint64_t addr) -{ - allocator->free(allocator, addr); -} +uint64_t guest_alloc(QGuestAllocator *allocator, size_t size); +void guest_free(QGuestAllocator *allocator, uint64_t addr); #endif From 1dbe67503b8d35cec797d2b8e742f11e4553cf3b Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Sun, 4 Jan 2015 09:53:46 +0800 Subject: [PATCH 21/38] .gitignore: Ignore generated "common.env" Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index e32a58417a..090f974cb9 100644 --- a/.gitignore +++ b/.gitignore @@ -109,3 +109,4 @@ cscope.* tags TAGS *~ +/tests/qemu-iotests/common.env From a2d9c0c407456faa871870c57cf99942739e28ab Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Sun, 4 Jan 2015 09:53:48 +0800 Subject: [PATCH 22/38] qemu-iotests: Replace "/bin/true" with "true" The former is not portable because on Mac OSX it is /usr/bin/true. Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/common.config | 2 +- tests/qemu-iotests/common.filter | 2 +- tests/qemu-iotests/common.rc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config index 91a5ef696b..a1973ad9d0 100644 --- a/tests/qemu-iotests/common.config +++ b/tests/qemu-iotests/common.config @@ -155,4 +155,4 @@ _readlink() } # make sure this script returns success -/bin/true +true diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter index bae96efcf3..b73c70be95 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -224,4 +224,4 @@ _filter_qemu_img_map() } # make sure this script returns success -/bin/true +true diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 3b14053790..aa093d9d84 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -490,4 +490,4 @@ _die() } # make sure this script returns success -/bin/true +true From 9c8ab1ae0d7c4a135c832c784f088ae5e2140585 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Sun, 4 Jan 2015 09:53:49 +0800 Subject: [PATCH 23/38] qemu-iotests: Add "_supported_os Linux" to 058 Other cases have this, and this test is not portable as well, as we want to add "make check-block" to "make check", it shouldn't fail on Mac OS X. Reported-by: Peter Maydell Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/058 | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/qemu-iotests/058 b/tests/qemu-iotests/058 index 2d5ca85ddc..a60b34b46c 100755 --- a/tests/qemu-iotests/058 +++ b/tests/qemu-iotests/058 @@ -87,6 +87,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow2 _supported_proto file +_supported_os Linux _require_command QEMU_NBD # Use -f raw instead of -f $IMGFMT for the NBD connection From bc521696607c5348fcd8a9e57b408d0ac0dbe2f8 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Sun, 4 Jan 2015 09:53:52 +0800 Subject: [PATCH 24/38] qemu-iotests: Add supported os parameter for python tests If I understand correctly, qemu-iotests never meant to be portable. We only support Linux for all the shell cases, but didn't specify it for python tests. Now add this and default all the python tests as Linux only. If we cares enough later, we can override the parameter in individual cases. Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/iotests.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index f57f1548ac..87002e0e2c 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -282,12 +282,15 @@ def notrun(reason): print '%s not run: %s' % (seq, reason) sys.exit(0) -def main(supported_fmts=[]): +def main(supported_fmts=[], supported_oses=['linux']): '''Run tests''' if supported_fmts and (imgfmt not in supported_fmts): notrun('not suitable for this image format: %s' % imgfmt) + if sys.platform not in supported_oses: + notrun('not suitable for this OS: %s' % sys.platform) + # We need to filter out the time taken from the output so that qemu-iotest # can reliably diff the results against master output. import StringIO From d1d1b206b07977fe0e75b0254e5b50c215c97803 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 2 Dec 2014 12:05:44 +0100 Subject: [PATCH 25/38] coroutine-ucontext: use __thread ELF thread local storage is about 10% faster on tests/test-coroutine's perf/cost test. The timing on my machine is 190ns per iteration with pthread TLS, 170 with ELF TLS. Based on a patch by Kevin Wolf and Peter Lieven, but redone to follow the model of coroutine-win32.c (including the important "noinline" attribute!). Platforms without thread-local storage (OpenBSD probably?) will need a new-enough GCC for this to compile, in order to use the same emutls support that Windows already relies on. Signed-off-by: Paolo Bonzini Reviewed-by: Fam Zheng Message-id: 1417518350-6167-2-git-send-email-pbonzini@redhat.com Signed-off-by: Stefan Hajnoczi --- coroutine-ucontext.c | 69 ++++++++++++-------------------------------- 1 file changed, 19 insertions(+), 50 deletions(-) diff --git a/coroutine-ucontext.c b/coroutine-ucontext.c index 4bf2cde279..259fcb48a4 100644 --- a/coroutine-ucontext.c +++ b/coroutine-ucontext.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include "qemu-common.h" #include "block/coroutine_int.h" @@ -48,15 +47,8 @@ typedef struct { /** * Per-thread coroutine bookkeeping */ -typedef struct { - /** Currently executing coroutine */ - Coroutine *current; - - /** The default coroutine */ - CoroutineUContext leader; -} CoroutineThreadState; - -static pthread_key_t thread_state_key; +static __thread CoroutineUContext leader; +static __thread Coroutine *current; /* * va_args to makecontext() must be type 'int', so passing @@ -68,36 +60,6 @@ union cc_arg { int i[2]; }; -static CoroutineThreadState *coroutine_get_thread_state(void) -{ - CoroutineThreadState *s = pthread_getspecific(thread_state_key); - - if (!s) { - s = g_malloc0(sizeof(*s)); - s->current = &s->leader.base; - pthread_setspecific(thread_state_key, s); - } - return s; -} - -static void qemu_coroutine_thread_cleanup(void *opaque) -{ - CoroutineThreadState *s = opaque; - - g_free(s); -} - -static void __attribute__((constructor)) coroutine_init(void) -{ - int ret; - - ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup); - if (ret != 0) { - fprintf(stderr, "unable to create leader key: %s\n", strerror(errno)); - abort(); - } -} - static void coroutine_trampoline(int i0, int i1) { union cc_arg arg; @@ -193,15 +155,23 @@ void qemu_coroutine_delete(Coroutine *co_) g_free(co); } -CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, - CoroutineAction action) +/* This function is marked noinline to prevent GCC from inlining it + * into coroutine_trampoline(). If we allow it to do that then it + * hoists the code to get the address of the TLS variable "current" + * out of the while() loop. This is an invalid transformation because + * the sigsetjmp() call may be called when running thread A but + * return in thread B, and so we might be in a different thread + * context each time round the loop. + */ +CoroutineAction __attribute__((noinline)) +qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + CoroutineAction action) { CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_); CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_); - CoroutineThreadState *s = coroutine_get_thread_state(); int ret; - s->current = to_; + current = to_; ret = sigsetjmp(from->env, 0); if (ret == 0) { @@ -212,14 +182,13 @@ CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, Coroutine *qemu_coroutine_self(void) { - CoroutineThreadState *s = coroutine_get_thread_state(); - - return s->current; + if (!current) { + current = &leader.base; + } + return current; } bool qemu_in_coroutine(void) { - CoroutineThreadState *s = pthread_getspecific(thread_state_key); - - return s && s->current->caller; + return current && current->caller; } From ef57137f1b30545a477ab798d34a669b0abf5320 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 2 Dec 2014 12:05:45 +0100 Subject: [PATCH 26/38] qemu-thread: add per-thread atexit functions Destructors are the main additional feature of pthread TLS compared to __thread. If we were using C++ (hint, hint!) we could have used thread-local objects with a destructor. Since we are not, instead, we add a simple Notifier-based API. Note that the notifier must be per-thread as well. We can add a global list as well later, perhaps. The Win32 implementation has some complications because a) detached threads used not to have a QemuThreadData; b) the main thread does not go through win32_start_routine, so we have to use atexit too. Signed-off-by: Paolo Bonzini Reviewed-by: Fam Zheng Message-id: 1417518350-6167-3-git-send-email-pbonzini@redhat.com Signed-off-by: Stefan Hajnoczi --- include/qemu/thread.h | 4 ++++ util/qemu-thread-posix.c | 37 +++++++++++++++++++++++++++++++ util/qemu-thread-win32.c | 48 +++++++++++++++++++++++++++++++--------- 3 files changed, 78 insertions(+), 11 deletions(-) diff --git a/include/qemu/thread.h b/include/qemu/thread.h index f7e3b9b290..e89fdc9785 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -61,4 +61,8 @@ bool qemu_thread_is_self(QemuThread *thread); void qemu_thread_exit(void *retval); void qemu_thread_naming(bool enable); +struct Notifier; +void qemu_thread_atexit_add(struct Notifier *notifier); +void qemu_thread_atexit_remove(struct Notifier *notifier); + #endif diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c index d05a6497e1..41cb23df0c 100644 --- a/util/qemu-thread-posix.c +++ b/util/qemu-thread-posix.c @@ -26,6 +26,7 @@ #endif #include "qemu/thread.h" #include "qemu/atomic.h" +#include "qemu/notify.h" static bool name_threads; @@ -401,6 +402,42 @@ void qemu_event_wait(QemuEvent *ev) } } +static pthread_key_t exit_key; + +union NotifierThreadData { + void *ptr; + NotifierList list; +}; +QEMU_BUILD_BUG_ON(sizeof(union NotifierThreadData) != sizeof(void *)); + +void qemu_thread_atexit_add(Notifier *notifier) +{ + union NotifierThreadData ntd; + ntd.ptr = pthread_getspecific(exit_key); + notifier_list_add(&ntd.list, notifier); + pthread_setspecific(exit_key, ntd.ptr); +} + +void qemu_thread_atexit_remove(Notifier *notifier) +{ + union NotifierThreadData ntd; + ntd.ptr = pthread_getspecific(exit_key); + notifier_remove(notifier); + pthread_setspecific(exit_key, ntd.ptr); +} + +static void qemu_thread_atexit_run(void *arg) +{ + union NotifierThreadData ntd = { .ptr = arg }; + notifier_list_notify(&ntd.list, NULL); +} + +static void __attribute__((constructor)) qemu_thread_atexit_init(void) +{ + pthread_key_create(&exit_key, qemu_thread_atexit_run); +} + + /* Attempt to set the threads name; note that this is for debug, so * we're not going to fail if we can't set it. */ diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c index c405c9bef6..406b52f91d 100644 --- a/util/qemu-thread-win32.c +++ b/util/qemu-thread-win32.c @@ -12,6 +12,7 @@ */ #include "qemu-common.h" #include "qemu/thread.h" +#include "qemu/notify.h" #include #include #include @@ -268,6 +269,7 @@ struct QemuThreadData { void *(*start_routine)(void *); void *arg; short mode; + NotifierList exit; /* Only used for joinable threads. */ bool exited; @@ -275,18 +277,40 @@ struct QemuThreadData { CRITICAL_SECTION cs; }; +static bool atexit_registered; +static NotifierList main_thread_exit; + static __thread QemuThreadData *qemu_thread_data; +static void run_main_thread_exit(void) +{ + notifier_list_notify(&main_thread_exit, NULL); +} + +void qemu_thread_atexit_add(Notifier *notifier) +{ + if (!qemu_thread_data) { + if (!atexit_registered) { + atexit_registered = true; + atexit(run_main_thread_exit); + } + notifier_list_add(&main_thread_exit, notifier); + } else { + notifier_list_add(&qemu_thread_data->exit, notifier); + } +} + +void qemu_thread_atexit_remove(Notifier *notifier) +{ + notifier_remove(notifier); +} + static unsigned __stdcall win32_start_routine(void *arg) { QemuThreadData *data = (QemuThreadData *) arg; void *(*start_routine)(void *) = data->start_routine; void *thread_arg = data->arg; - if (data->mode == QEMU_THREAD_DETACHED) { - g_free(data); - data = NULL; - } qemu_thread_data = data; qemu_thread_exit(start_routine(thread_arg)); abort(); @@ -296,12 +320,14 @@ void qemu_thread_exit(void *arg) { QemuThreadData *data = qemu_thread_data; - if (data) { - assert(data->mode != QEMU_THREAD_DETACHED); + notifier_list_notify(&data->exit, NULL); + if (data->mode == QEMU_THREAD_JOINABLE) { data->ret = arg; EnterCriticalSection(&data->cs); data->exited = true; LeaveCriticalSection(&data->cs); + } else { + g_free(data); } _endthreadex(0); } @@ -313,9 +339,10 @@ void *qemu_thread_join(QemuThread *thread) HANDLE handle; data = thread->data; - if (!data) { + if (data->mode == QEMU_THREAD_DETACHED) { return NULL; } + /* * Because multiple copies of the QemuThread can exist via * qemu_thread_get_self, we need to store a value that cannot @@ -329,7 +356,6 @@ void *qemu_thread_join(QemuThread *thread) CloseHandle(handle); } ret = data->ret; - assert(data->mode != QEMU_THREAD_DETACHED); DeleteCriticalSection(&data->cs); g_free(data); return ret; @@ -347,6 +373,7 @@ void qemu_thread_create(QemuThread *thread, const char *name, data->arg = arg; data->mode = mode; data->exited = false; + notifier_list_init(&data->exit); if (data->mode != QEMU_THREAD_DETACHED) { InitializeCriticalSection(&data->cs); @@ -358,7 +385,7 @@ void qemu_thread_create(QemuThread *thread, const char *name, error_exit(GetLastError(), __func__); } CloseHandle(hThread); - thread->data = (mode == QEMU_THREAD_DETACHED) ? NULL : data; + thread->data = data; } void qemu_thread_get_self(QemuThread *thread) @@ -373,11 +400,10 @@ HANDLE qemu_thread_get_handle(QemuThread *thread) HANDLE handle; data = thread->data; - if (!data) { + if (data->mode == QEMU_THREAD_DETACHED) { return NULL; } - assert(data->mode != QEMU_THREAD_DETACHED); EnterCriticalSection(&data->cs); if (!data->exited) { handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME, FALSE, From 6d86ae0824bdd6175dd3874688a871e981093888 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 2 Dec 2014 12:05:46 +0100 Subject: [PATCH 27/38] test-coroutine: avoid overflow on 32-bit systems unsigned long is not large enough to represent 1000000000 * duration there. Just use floating point. Signed-off-by: Paolo Bonzini Reviewed-by: Fam Zheng Message-id: 1417518350-6167-4-git-send-email-pbonzini@redhat.com Signed-off-by: Stefan Hajnoczi --- tests/test-coroutine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-coroutine.c b/tests/test-coroutine.c index e22fae170a..27d1b6f8e8 100644 --- a/tests/test-coroutine.c +++ b/tests/test-coroutine.c @@ -337,7 +337,7 @@ static void perf_cost(void) "%luns per coroutine", maxcycles, duration, ops, - (unsigned long)(1000000000 * duration) / maxcycles); + (unsigned long)(1000000000.0 * duration / maxcycles)); } int main(int argc, char **argv) From c740ad92d0d958fa785e5d7aa1b67ecaf30a6a54 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 2 Dec 2014 12:05:47 +0100 Subject: [PATCH 28/38] QSLIST: add lock-free operations These operations are trivial to implement and do not have ABA problems. They are enough to implement simple multiple-producer, single consumer lock-free lists or, as in the next patch, the multiple consumers can steal a whole batch of elements and process them at their leisure. Signed-off-by: Paolo Bonzini Reviewed-by: Fam Zheng Message-id: 1417518350-6167-5-git-send-email-pbonzini@redhat.com Signed-off-by: Stefan Hajnoczi --- include/qemu/queue.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/include/qemu/queue.h b/include/qemu/queue.h index 42bcadfbb1..a98eb3ad79 100644 --- a/include/qemu/queue.h +++ b/include/qemu/queue.h @@ -191,8 +191,19 @@ struct { \ } while (/*CONSTCOND*/0) #define QSLIST_INSERT_HEAD(head, elm, field) do { \ - (elm)->field.sle_next = (head)->slh_first; \ - (head)->slh_first = (elm); \ + (elm)->field.sle_next = (head)->slh_first; \ + (head)->slh_first = (elm); \ +} while (/*CONSTCOND*/0) + +#define QSLIST_INSERT_HEAD_ATOMIC(head, elm, field) do { \ + do { \ + (elm)->field.sle_next = (head)->slh_first; \ + } while (atomic_cmpxchg(&(head)->slh_first, (elm)->field.sle_next, \ + (elm)) != (elm)->field.sle_next); \ +} while (/*CONSTCOND*/0) + +#define QSLIST_MOVE_ATOMIC(dest, src) do { \ + (dest)->slh_first = atomic_xchg(&(src)->slh_first, NULL); \ } while (/*CONSTCOND*/0) #define QSLIST_REMOVE_HEAD(head, field) do { \ From 4d68e86bb10159099da0798f74e7512955f15eec Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 2 Dec 2014 12:05:48 +0100 Subject: [PATCH 29/38] coroutine: rewrite pool to avoid mutex This patch removes the mutex by using fancy lock-free manipulation of the pool. Lock-free stacks and queues are not hard, but they can suffer from the ABA problem so they are better avoided unless you have some deferred reclamation scheme like RCU. Otherwise you have to stick with adding to a list, and emptying it completely. This is what this patch does, by coupling a lock-free global list of available coroutines with per-CPU lists that are actually used on coroutine creation. Whenever the destruction pool is big enough, the next thread that runs out of coroutines will steal the whole destruction pool. This is positive in two ways: 1) the allocation does not have to do any atomic operation in the fast path, it's entirely using thread-local storage. Once every POOL_BATCH_SIZE allocations it will do a single atomic_xchg. Release does an atomic_cmpxchg loop, that hopefully doesn't cause any starvation, and an atomic_inc. A later patch will also remove atomic operations from the release path, and try to avoid the atomic_xchg altogether---succeeding in doing so if all devices either use ioeventfd or are not submitting requests actively. 2) in theory this should be completely adaptive. The number of coroutines around should be a little more than POOL_BATCH_SIZE * number of allocating threads; so this also empties qemu_coroutine_adjust_pool_size. (The previous pool size was POOL_BATCH_SIZE * number of block backends, so it was a bit more generous. But if you actually have many high-iodepth disks, it's better to put them in different iothreads, which will also use separate thread pools and aio=native file descriptors). This speeds up perf/cost (in tests/test-coroutine) by a factor of ~1.33. No matter if we end with some kind of coroutine bypass scheme or not, it cannot hurt to optimize hot code. Signed-off-by: Paolo Bonzini Reviewed-by: Fam Zheng Message-id: 1417518350-6167-6-git-send-email-pbonzini@redhat.com Signed-off-by: Stefan Hajnoczi --- qemu-coroutine.c | 94 ++++++++++++++++++++++-------------------------- 1 file changed, 42 insertions(+), 52 deletions(-) diff --git a/qemu-coroutine.c b/qemu-coroutine.c index bd574aa1b5..93fddc7dfb 100644 --- a/qemu-coroutine.c +++ b/qemu-coroutine.c @@ -15,31 +15,57 @@ #include "trace.h" #include "qemu-common.h" #include "qemu/thread.h" +#include "qemu/atomic.h" #include "block/coroutine.h" #include "block/coroutine_int.h" enum { - POOL_DEFAULT_SIZE = 64, + POOL_BATCH_SIZE = 64, }; /** Free list to speed up creation */ -static QemuMutex pool_lock; -static QSLIST_HEAD(, Coroutine) pool = QSLIST_HEAD_INITIALIZER(pool); -static unsigned int pool_size; -static unsigned int pool_max_size = POOL_DEFAULT_SIZE; +static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); +static unsigned int release_pool_size; +static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); +static __thread Notifier coroutine_pool_cleanup_notifier; + +static void coroutine_pool_cleanup(Notifier *n, void *value) +{ + Coroutine *co; + Coroutine *tmp; + + QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) { + QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); + qemu_coroutine_delete(co); + } +} Coroutine *qemu_coroutine_create(CoroutineEntry *entry) { Coroutine *co = NULL; if (CONFIG_COROUTINE_POOL) { - qemu_mutex_lock(&pool_lock); - co = QSLIST_FIRST(&pool); - if (co) { - QSLIST_REMOVE_HEAD(&pool, pool_next); - pool_size--; + co = QSLIST_FIRST(&alloc_pool); + if (!co) { + if (release_pool_size > POOL_BATCH_SIZE) { + /* Slow path; a good place to register the destructor, too. */ + if (!coroutine_pool_cleanup_notifier.notify) { + coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; + qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier); + } + + /* This is not exact; there could be a little skew between + * release_pool_size and the actual size of release_pool. But + * it is just a heuristic, it does not need to be perfect. + */ + release_pool_size = 0; + QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool); + co = QSLIST_FIRST(&alloc_pool); + } + } + if (co) { + QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); } - qemu_mutex_unlock(&pool_lock); } if (!co) { @@ -53,39 +79,19 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry) static void coroutine_delete(Coroutine *co) { + co->caller = NULL; + if (CONFIG_COROUTINE_POOL) { - qemu_mutex_lock(&pool_lock); - if (pool_size < pool_max_size) { - QSLIST_INSERT_HEAD(&pool, co, pool_next); - co->caller = NULL; - pool_size++; - qemu_mutex_unlock(&pool_lock); + if (release_pool_size < POOL_BATCH_SIZE * 2) { + QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); + atomic_inc(&release_pool_size); return; } - qemu_mutex_unlock(&pool_lock); } qemu_coroutine_delete(co); } -static void __attribute__((constructor)) coroutine_pool_init(void) -{ - qemu_mutex_init(&pool_lock); -} - -static void __attribute__((destructor)) coroutine_pool_cleanup(void) -{ - Coroutine *co; - Coroutine *tmp; - - QSLIST_FOREACH_SAFE(co, &pool, pool_next, tmp) { - QSLIST_REMOVE_HEAD(&pool, pool_next); - qemu_coroutine_delete(co); - } - - qemu_mutex_destroy(&pool_lock); -} - static void coroutine_swap(Coroutine *from, Coroutine *to) { CoroutineAction ret; @@ -140,20 +146,4 @@ void coroutine_fn qemu_coroutine_yield(void) void qemu_coroutine_adjust_pool_size(int n) { - qemu_mutex_lock(&pool_lock); - - pool_max_size += n; - - /* Callers should never take away more than they added */ - assert(pool_max_size >= POOL_DEFAULT_SIZE); - - /* Trim oversized pool down to new max */ - while (pool_size > pool_max_size) { - Coroutine *co = QSLIST_FIRST(&pool); - QSLIST_REMOVE_HEAD(&pool, pool_next); - pool_size--; - qemu_coroutine_delete(co); - } - - qemu_mutex_unlock(&pool_lock); } From 66552b894bd68dd6539fb6d656ad2c21bdd6acbe Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 2 Dec 2014 12:05:49 +0100 Subject: [PATCH 30/38] coroutine: drop qemu_coroutine_adjust_pool_size This is not needed anymore. The new TLS-based algorithm is adaptive. Signed-off-by: Paolo Bonzini Reviewed-by: Fam Zheng Message-id: 1417518350-6167-7-git-send-email-pbonzini@redhat.com Signed-off-by: Stefan Hajnoczi --- block/block-backend.c | 4 ---- include/block/coroutine.h | 10 ---------- qemu-coroutine.c | 4 ---- 3 files changed, 18 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index ef16d7356a..d00c129f15 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -260,9 +260,6 @@ int blk_attach_dev(BlockBackend *blk, void *dev) blk_ref(blk); blk->dev = dev; bdrv_iostatus_reset(blk->bs); - - /* We're expecting I/O from the device so bump up coroutine pool size */ - qemu_coroutine_adjust_pool_size(COROUTINE_POOL_RESERVATION); return 0; } @@ -290,7 +287,6 @@ void blk_detach_dev(BlockBackend *blk, void *dev) blk->dev_ops = NULL; blk->dev_opaque = NULL; bdrv_set_guest_block_size(blk->bs, 512); - qemu_coroutine_adjust_pool_size(-COROUTINE_POOL_RESERVATION); blk_unref(blk); } diff --git a/include/block/coroutine.h b/include/block/coroutine.h index 793df0ef8b..20c027a7fd 100644 --- a/include/block/coroutine.h +++ b/include/block/coroutine.h @@ -216,14 +216,4 @@ void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type, */ void coroutine_fn yield_until_fd_readable(int fd); -/** - * Add or subtract from the coroutine pool size - * - * The coroutine implementation keeps a pool of coroutines to be reused by - * qemu_coroutine_create(). This makes coroutine creation cheap. Heavy - * coroutine users should call this to reserve pool space. Call it again with - * a negative number to release pool space. - */ -void qemu_coroutine_adjust_pool_size(int n); - #endif /* QEMU_COROUTINE_H */ diff --git a/qemu-coroutine.c b/qemu-coroutine.c index 93fddc7dfb..da1b9615d0 100644 --- a/qemu-coroutine.c +++ b/qemu-coroutine.c @@ -143,7 +143,3 @@ void coroutine_fn qemu_coroutine_yield(void) self->caller = NULL; coroutine_swap(self, to); } - -void qemu_coroutine_adjust_pool_size(int n) -{ -} From 51a2219bdceed16e81c6e2e2f08aed39c579728f Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 2 Dec 2014 12:05:50 +0100 Subject: [PATCH 31/38] coroutine: try harder not to delete coroutines Placing coroutines on the global pool should be preferrable, because it can help all threads. But if the global pool is full, we can still try to save some allocations by stashing completed coroutines on the local pool. This is quite cheap too, because it does not require atomic operations, and provides a gain of 15% in the best case. Signed-off-by: Peter Lieven Signed-off-by: Paolo Bonzini Reviewed-by: Fam Zheng Message-id: 1417518350-6167-8-git-send-email-pbonzini@redhat.com Signed-off-by: Stefan Hajnoczi --- qemu-coroutine.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/qemu-coroutine.c b/qemu-coroutine.c index da1b9615d0..525247b050 100644 --- a/qemu-coroutine.c +++ b/qemu-coroutine.c @@ -27,6 +27,7 @@ enum { static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); static unsigned int release_pool_size; static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); +static __thread unsigned int alloc_pool_size; static __thread Notifier coroutine_pool_cleanup_notifier; static void coroutine_pool_cleanup(Notifier *n, void *value) @@ -58,13 +59,14 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry) * release_pool_size and the actual size of release_pool. But * it is just a heuristic, it does not need to be perfect. */ - release_pool_size = 0; + alloc_pool_size = atomic_xchg(&release_pool_size, 0); QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool); co = QSLIST_FIRST(&alloc_pool); } } if (co) { QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); + alloc_pool_size--; } } @@ -87,6 +89,11 @@ static void coroutine_delete(Coroutine *co) atomic_inc(&release_pool_size); return; } + if (alloc_pool_size < POOL_BATCH_SIZE) { + QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next); + alloc_pool_size++; + return; + } } qemu_coroutine_delete(co); From 095e4fa4b56cf511cb41005872eeace9a2f24582 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Mon, 5 Jan 2015 12:29:49 +0100 Subject: [PATCH 32/38] block: limited request size in write zeroes unsupported path If bs->bl.max_write_zeroes is large and we end up in the unsupported path we might allocate a lot of memory for the iovector and/or even generate an oversized requests. Fix this by limiting the request by the minimum of the reported maximum transfer size or 16MB (32768 sectors). Reported-by: Denis V. Lunev Signed-off-by: Peter Lieven Reviewed-by: Denis V. Lunev Message-id: 1420457389-16332-1-git-send-email-pl@kamp.de Signed-off-by: Stefan Hajnoczi --- block.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index e76a223eae..371d0f6745 100644 --- a/block.c +++ b/block.c @@ -3244,6 +3244,9 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, if (ret == -ENOTSUP) { /* Fall back to bounce buffer if write zeroes is unsupported */ + int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length, + MAX_WRITE_ZEROES_DEFAULT); + num = MIN(num, max_xfer_len); iov.iov_len = num * BDRV_SECTOR_SIZE; if (iov.iov_base == NULL) { iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE); @@ -3260,7 +3263,7 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, /* Keep bounce buffer around if it is big enough for all * all future requests. */ - if (num < max_write_zeroes) { + if (num < max_xfer_len) { qemu_vfree(iov.iov_base); iov.iov_base = NULL; } From bb00021de0b5908bc2c3ca467ad9a2b0c9c36459 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:14:00 +0800 Subject: [PATCH 33/38] block: Split BLOCK_OP_TYPE_COMMIT to BLOCK_OP_TYPE_COMMIT_{SOURCE, TARGET} Like BLOCK_OP_TYPE_BACKUP_SOURCE and BLOCK_OP_TYPE_BACKUP_TARGET, block-commit involves two asymmetric devices. This change is not user-visible (yet), because commit only works with device names. But once we enable backing reference in blockdev-add, or specifying node-name in block-commit command, we don't want the user to start two commit jobs on the same backing chain, which will corrupt things because of the final bdrv_swap. Before we have per category blockers, splitting this type is still better. [Resolved virtio-blk dataplane conflict by replacing BLOCK_OP_TYPE_COMMIT with both BLOCK_OP_TYPE_COMMIT_{SOURCE, TARGET}. They are safe since the block job runs in the same AioContext as the dataplane IOThread. --Stefan] Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block.c | 6 +++--- blockdev.c | 6 +++++- hw/block/dataplane/virtio-blk.c | 3 ++- include/block/block.h | 3 ++- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/block.c b/block.c index 371d0f6745..cbe4a32a5a 100644 --- a/block.c +++ b/block.c @@ -1200,7 +1200,7 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) bdrv_op_block_all(bs->backing_hd, bs->backing_blocker); /* Otherwise we won't be able to commit due to check in bdrv_commit */ - bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, + bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, bs->backing_blocker); out: bdrv_refresh_limits(bs, NULL); @@ -2216,8 +2216,8 @@ int bdrv_commit(BlockDriverState *bs) return -ENOTSUP; } - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) || - bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) { + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) || + bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) { return -EBUSY; } diff --git a/blockdev.c b/blockdev.c index f2b3b25c10..d59efd3f15 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2218,7 +2218,7 @@ void qmp_block_commit(const char *device, /* drain all i/o before commits */ bdrv_drain_all(); - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, errp)) { + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, errp)) { goto out; } @@ -2251,6 +2251,10 @@ void qmp_block_commit(const char *device, assert(bdrv_get_aio_context(base_bs) == aio_context); + if (bdrv_op_is_blocked(base_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { + goto out; + } + /* Do not allow attempts to commit an image into itself */ if (top_bs == base_bs) { error_setg(errp, "cannot commit an image into itself"); diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index 2a28978cba..39c5d7103c 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -198,7 +198,8 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_DRIVE_DEL, s->blocker); blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_BACKUP_SOURCE, s->blocker); blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_CHANGE, s->blocker); - blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_COMMIT, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_COMMIT_SOURCE, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_COMMIT_TARGET, s->blocker); blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_EJECT, s->blocker); blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, s->blocker); blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, s->blocker); diff --git a/include/block/block.h b/include/block/block.h index 760e78b71d..3082d2b80e 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -133,7 +133,8 @@ typedef enum BlockOpType { BLOCK_OP_TYPE_BACKUP_SOURCE, BLOCK_OP_TYPE_BACKUP_TARGET, BLOCK_OP_TYPE_CHANGE, - BLOCK_OP_TYPE_COMMIT, + BLOCK_OP_TYPE_COMMIT_SOURCE, + BLOCK_OP_TYPE_COMMIT_TARGET, BLOCK_OP_TYPE_DATAPLANE, BLOCK_OP_TYPE_DRIVE_DEL, BLOCK_OP_TYPE_EJECT, From 9a502563eef7d7c2c9120b237059426e229eefe9 Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 10 Dec 2014 13:17:07 -0500 Subject: [PATCH 34/38] ide: Implement VPD response for ATAPI SCSI devices have multiple kinds of queries they need to respond to, as defined in the "cmd inquiry" section in MMC-6 and SPC-3. Relevent sections: MMC-6 revision 2g: Non-VPD response data and pointer to SPC-3; Section 6.8 "Inquiry Command" SPC-3 revision 23: Inquiry command and error handling: Section 6.4 "INQUIRY command" VPD data pages format: Section 7.6 "Vital product data parameters" We implement these Vital Product Data queries for SCSI, but not for ATAPI through IDE. The result is that if you are looking for the WWN identifier via tools such as sg3_utils, you will be unable to query our CD/DVD rom device to obtain it. This patch adds the minimum number of mandatory responses as defined by SPC-3, which include the "supported pages" response (page 0x00) and the "Device Identification" response (page 0x83). It also correctly responds when it receives a request for an illegal page to improve error output from related tools. The Device ID page contains an arbitrary list of identification strings of various formats; the ID strings included in this patch were chosen to mimic those provided by the libata driver when emulating this SCSI query (model, serial, and wwn when present.) Example: # libata emulated response [root@localhost ~]# sg_inq --id /dev/sda VPD INQUIRY: Device Identification page Designation descriptor number 1, descriptor length: 24 designator_type: vendor specific [0x0], code_set: ASCII associated with the addressed logical unit vendor specific: QM00001 Designation descriptor number 2, descriptor length: 72 designator_type: T10 vendor identification, code_set: ASCII associated with the addressed logical unit vendor id: ATA vendor specific: QEMU HARDDISK QM00001 # QEMU generated ATAPI response, with WWN [root@localhost ~]# sg_inq --id /dev/sr0 VPD INQUIRY: Device Identification page Designation descriptor number 1, descriptor length: 24 designator_type: vendor specific [0x0], code_set: ASCII associated with the addressed logical unit vendor specific: QM00005 Designation descriptor number 2, descriptor length: 72 designator_type: T10 vendor identification, code_set: ASCII associated with the addressed logical unit vendor id: ATA vendor specific: QEMU DVD-ROM QM00005 Designation descriptor number 3, descriptor length: 12 designator_type: NAA, code_set: Binary associated with the addressed logical unit NAA 5, IEEE Company_id: 0xc50 Vendor Specific Identifier: 0x15ea71bb [0x5000c50015ea71bb] See also: hw/scsi/scsi-disk.c, scsi_disk_emulate_inquiry() Signed-off-by: John Snow Signed-off-by: Stefan Hajnoczi --- hw/ide/atapi.c | 111 +++++++++++++++++++++++++++++++++++++++++----- hw/ide/internal.h | 1 + 2 files changed, 100 insertions(+), 12 deletions(-) diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c index c63b7e556e..a71e6e014f 100644 --- a/hw/ide/atapi.c +++ b/hw/ide/atapi.c @@ -621,20 +621,107 @@ static void cmd_request_sense(IDEState *s, uint8_t *buf) static void cmd_inquiry(IDEState *s, uint8_t *buf) { + uint8_t page_code = buf[2]; int max_len = buf[4]; - buf[0] = 0x05; /* CD-ROM */ - buf[1] = 0x80; /* removable */ - buf[2] = 0x00; /* ISO */ - buf[3] = 0x21; /* ATAPI-2 (XXX: put ATAPI-4 ?) */ - buf[4] = 31; /* additional length */ - buf[5] = 0; /* reserved */ - buf[6] = 0; /* reserved */ - buf[7] = 0; /* reserved */ - padstr8(buf + 8, 8, "QEMU"); - padstr8(buf + 16, 16, "QEMU DVD-ROM"); - padstr8(buf + 32, 4, s->version); - ide_atapi_cmd_reply(s, 36, max_len); + unsigned idx = 0; + unsigned size_idx; + unsigned preamble_len; + + /* If the EVPD (Enable Vital Product Data) bit is set in byte 1, + * we are being asked for a specific page of info indicated by byte 2. */ + if (buf[1] & 0x01) { + preamble_len = 4; + size_idx = 3; + + buf[idx++] = 0x05; /* CD-ROM */ + buf[idx++] = page_code; /* Page Code */ + buf[idx++] = 0x00; /* reserved */ + idx++; /* length (set later) */ + + switch (page_code) { + case 0x00: + /* Supported Pages: List of supported VPD responses. */ + buf[idx++] = 0x00; /* 0x00: Supported Pages, and: */ + buf[idx++] = 0x83; /* 0x83: Device Identification. */ + break; + + case 0x83: + /* Device Identification. Each entry is optional, but the entries + * included here are modeled after libata's VPD responses. + * If the response is given, at least one entry must be present. */ + + /* Entry 1: Serial */ + if (idx + 24 > max_len) { + /* Not enough room for even the first entry: */ + /* 4 byte header + 20 byte string */ + ide_atapi_cmd_error(s, ILLEGAL_REQUEST, + ASC_DATA_PHASE_ERROR); + return; + } + buf[idx++] = 0x02; /* Ascii */ + buf[idx++] = 0x00; /* Vendor Specific */ + buf[idx++] = 0x00; + buf[idx++] = 20; /* Remaining length */ + padstr8(buf + idx, 20, s->drive_serial_str); + idx += 20; + + /* Entry 2: Drive Model and Serial */ + if (idx + 72 > max_len) { + /* 4 (header) + 8 (vendor) + 60 (model & serial) */ + goto out; + } + buf[idx++] = 0x02; /* Ascii */ + buf[idx++] = 0x01; /* T10 Vendor */ + buf[idx++] = 0x00; + buf[idx++] = 68; + padstr8(buf + idx, 8, "ATA"); /* Generic T10 vendor */ + idx += 8; + padstr8(buf + idx, 40, s->drive_model_str); + idx += 40; + padstr8(buf + idx, 20, s->drive_serial_str); + idx += 20; + + /* Entry 3: WWN */ + if (s->wwn && (idx + 12 <= max_len)) { + /* 4 byte header + 8 byte wwn */ + buf[idx++] = 0x01; /* Binary */ + buf[idx++] = 0x03; /* NAA */ + buf[idx++] = 0x00; + buf[idx++] = 0x08; + stq_be_p(&buf[idx], s->wwn); + idx += 8; + } + break; + + default: + /* SPC-3, revision 23 sec. 6.4 */ + ide_atapi_cmd_error(s, ILLEGAL_REQUEST, + ASC_INV_FIELD_IN_CMD_PACKET); + return; + } + } else { + preamble_len = 5; + size_idx = 4; + + buf[0] = 0x05; /* CD-ROM */ + buf[1] = 0x80; /* removable */ + buf[2] = 0x00; /* ISO */ + buf[3] = 0x21; /* ATAPI-2 (XXX: put ATAPI-4 ?) */ + /* buf[size_idx] set below. */ + buf[5] = 0; /* reserved */ + buf[6] = 0; /* reserved */ + buf[7] = 0; /* reserved */ + padstr8(buf + 8, 8, "QEMU"); + padstr8(buf + 16, 16, "QEMU DVD-ROM"); + padstr8(buf + 32, 4, s->version); + idx = 36; + } + + out: + buf[size_idx] = idx - preamble_len; + ide_atapi_cmd_reply(s, idx, max_len); + return; } static void cmd_get_configuration(IDEState *s, uint8_t *buf) diff --git a/hw/ide/internal.h b/hw/ide/internal.h index 8a3eca40d2..c998003bf3 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -296,6 +296,7 @@ typedef struct IDEDMAOps IDEDMAOps; #define ASC_INCOMPATIBLE_FORMAT 0x30 #define ASC_MEDIUM_NOT_PRESENT 0x3a #define ASC_SAVING_PARAMETERS_NOT_SUPPORTED 0x39 +#define ASC_DATA_PHASE_ERROR 0x4b #define ASC_MEDIA_REMOVAL_PREVENTED 0x53 #define CFA_NO_ERROR 0x00 From e7026f1953c16d1cc3e8310cc27db6acf61d9def Mon Sep 17 00:00:00 2001 From: Alex Friedman Date: Fri, 5 Dec 2014 14:40:24 +0200 Subject: [PATCH 35/38] nvme: Fix get/set number of queues feature According to the specification, the low 16 bits should contain the number of I/O submission queues, and the high 16 bits should contain the number of I/O completion queues. Signed-off-by: Alex Friedman Acked-by: Keith Busch Signed-off-by: Stefan Hajnoczi --- hw/block/nvme.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index aa1ed986d2..4f70f91443 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -476,7 +476,8 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) switch (dw10) { case NVME_NUMBER_OF_QUEUES: - req->cqe.result = cpu_to_le32(n->num_queues); + req->cqe.result = + cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16)); break; default: return NVME_INVALID_FIELD | NVME_DNR; @@ -490,7 +491,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) switch (dw10) { case NVME_NUMBER_OF_QUEUES: - req->cqe.result = cpu_to_le32(n->num_queues); + req->cqe.result = + cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16)); break; default: return NVME_INVALID_FIELD | NVME_DNR; From 5734edd8372cce37c8c203b95beb03acbec72f49 Mon Sep 17 00:00:00 2001 From: Chrysostomos Nanakos Date: Tue, 9 Dec 2014 14:58:22 +0200 Subject: [PATCH 36/38] MAINTAINERS: Update email addresses for Chrysostomos Nanakos Remove first email address and let the one from which I am contributing. Signed-off-by: Chrysostomos Nanakos Signed-off-by: Stefan Hajnoczi --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 29c6834852..cd7b04f30c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1105,7 +1105,6 @@ S: Supported F: block/ssh.c ARCHIPELAGO -M: Chrysostomos Nanakos M: Chrysostomos Nanakos S: Maintained F: block/archipelago.c From 47b0f45a9282aaf73231f34ca60369a12c8d8d3c Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Dec 2014 18:49:47 +0800 Subject: [PATCH 37/38] MAINTAINERS: Add migration/block* to block subsystem We are moving block-migration.c to the separated migration directory, keep this file watched by block maintainers is a good idea. Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index cd7b04f30c..430688dcab 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -757,6 +757,7 @@ F: aio-*.c F: block* F: block/ F: hw/block/ +F: migration/block* F: qemu-img* F: qemu-io* F: tests/image-fuzzer/ From 07d31d07f4b28a61b65cec95da69851c675f20b9 Mon Sep 17 00:00:00 2001 From: Anubhav Rakshit Date: Thu, 8 Jan 2015 15:10:35 +0530 Subject: [PATCH 38/38] NVMe: Set correct VS Value for 1.1 Compliant Controllers According to NVMe specifications Bits 15:08 represent Minor Version number. Signed-off-by: Anubhav Rakshit Signed-off-by: Stefan Hajnoczi --- hw/block/nvme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 4f70f91443..ce079aefdd 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -815,7 +815,7 @@ static int nvme_init(PCIDevice *pci_dev) NVME_CAP_SET_CSS(n->bar.cap, 1); NVME_CAP_SET_MPSMAX(n->bar.cap, 4); - n->bar.vs = 0x00010001; + n->bar.vs = 0x00010100; n->bar.intmc = n->bar.intms = 0; for (i = 0; i < n->num_namespaces; i++) {