From e166b4148208656635ea2fe39df8b1e875a34fb8 Mon Sep 17 00:00:00 2001 From: Bo Tu Date: Fri, 3 Jul 2015 15:28:46 +0800 Subject: [PATCH 01/16] qemu-iotests: qemu machine type support This patch adds qemu machine type support to the io test suite. Based on the qemu default machine type and alias of the default machine type the reference output file can now vary from the default to a machine specific output file if necessary. When using a machine specific reference file if the default machine has an alias then use the alias as the output file name otherwise use the default machine name as the output file name. Reviewed-by: Max Reitz Reviewed-by: Michael Mueller Reviewed-by: Sascha Silbe Signed-off-by: Xiao Guang Chen Signed-off-by: Kevin Wolf --- tests/qemu-iotests/check | 5 +++++ tests/qemu-iotests/common.config | 11 ++++++++++- tests/qemu-iotests/iotests.py | 1 + 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index 1fa63193ba..6d582033e8 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -330,6 +330,11 @@ do fi reference="$source_iotests/$seq.out" + reference_machine="$source_iotests/$seq.$QEMU_DEFAULT_MACHINE.out" + if [ -f "$reference_machine" ]; then + reference="$reference_machine" + fi + if [ "$CACHEMODE" = "none" ]; then [ -f "$source_iotests/$seq.out.nocache" ] && reference="$source_iotests/$seq.out.nocache" fi diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config index a1973ad9d0..e0bf896e46 100644 --- a/tests/qemu-iotests/common.config +++ b/tests/qemu-iotests/common.config @@ -103,10 +103,19 @@ if [ -z "$QEMU_NBD_PROG" ]; then export QEMU_NBD_PROG="`set_prog_path qemu-nbd`" fi -export QEMU=$QEMU_PROG +export QEMU="$QEMU_PROG $QEMU_OPTIONS" export QEMU_IMG=$QEMU_IMG_PROG export QEMU_IO="$QEMU_IO_PROG $QEMU_IO_OPTIONS" export QEMU_NBD=$QEMU_NBD_PROG +default_machine=$($QEMU -machine \? | awk '/(default)/{print $1}') +default_alias_machine=$($QEMU -machine \? |\ + awk -v var_default_machine="$default_machine"\)\ + '{if ($(NF-2)=="(alias"&&$(NF-1)=="of"&&$(NF)==var_default_machine){print $1}}') +if [ ! -z "$default_alias_machine" ]; then + default_machine="$default_alias_machine" +fi + +export QEMU_DEFAULT_MACHINE="$default_machine" [ -f /etc/qemu-iotest.config ] && . /etc/qemu-iotest.config diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 8615b10751..557925314b 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -42,6 +42,7 @@ imgproto = os.environ.get('IMGPROTO', 'file') test_dir = os.environ.get('TEST_DIR', '/var/tmp') output_dir = os.environ.get('OUTPUT_DIR', '.') cachemode = os.environ.get('CACHEMODE') +qemu_default_machine = os.environ.get('QEMU_DEFAULT_MACHINE') socket_scm_helper = os.environ.get('SOCKET_SCM_HELPER', 'socket_scm_helper') From 2711fd33a4b18c5e35a6f7efe57b5d868def829e Mon Sep 17 00:00:00 2001 From: Bo Tu Date: Fri, 3 Jul 2015 15:28:47 +0800 Subject: [PATCH 02/16] qemu-iotests: disable default qemu devices for cross-platform compatibility This patch fixes an io test suite issue that was introduced with the commit c88930a6866e74953e931ae749781e98e486e5c8 'qemu-char: Permit only a single "stdio" character device'. The option supresses the creation of default devices such as the floopy and cdrom. Output files for test case 067, 071, 081 and 087 need to be updated to accommodate this change. Use virtio-blk instead of virtio-blk-pci as the device driver for test case 067. For virtio-blk-pci is the same with virtio-blk as device driver but other platform such as s390 may not recognize the virtio-blk-pci. The default devices differ across machines. As the qemu output often contains these devices (or events for them, like opening a CD tray on reset), the reference output currently is rather machine-specific. All existing qemu tests explicitly configure the devices they're working with, so just pass -nodefaults to qemu by default to disable the default devices. Update the reference outputs accordingly. Reviewed-by: Max Reitz Reviewed-by: Michael Mueller Reviewed-by: Sascha Silbe Signed-off-by: Xiao Guang Chen Signed-off-by: Kevin Wolf --- tests/qemu-iotests/067 | 8 +- tests/qemu-iotests/067.out | 266 +-------------------------------- tests/qemu-iotests/071.out | 4 - tests/qemu-iotests/081.out | 2 - tests/qemu-iotests/087.out | 12 -- tests/qemu-iotests/common | 1 + tests/qemu-iotests/common.qemu | 2 +- 7 files changed, 7 insertions(+), 288 deletions(-) diff --git a/tests/qemu-iotests/067 b/tests/qemu-iotests/067 index 83eefa394e..3e9a053ef3 100755 --- a/tests/qemu-iotests/067 +++ b/tests/qemu-iotests/067 @@ -59,7 +59,7 @@ echo echo === -drive/-device and device_del === echo -run_qemu -drive file=$TEST_IMG,format=$IMGFMT,if=none,id=disk -device virtio-blk-pci,drive=disk,id=virtio0 <"${fifo_out}" \ 2>&1 \ <"${fifo_in}" & From d8683155fa76cabff112271771e43e21034ff2ba Mon Sep 17 00:00:00 2001 From: Bo Tu Date: Fri, 3 Jul 2015 15:28:48 +0800 Subject: [PATCH 03/16] qemu-iotests: s390x: fix test 041 and 055 There is no 'ide-cd' device defined on non-pc platform, so test_medium_not_found() test should be skipped. Reviewed-by: Max Reitz Reviewed-by: Michael Mueller Reviewed-by: Sascha Silbe Signed-off-by: Xiao Guang Chen Signed-off-by: Kevin Wolf --- tests/qemu-iotests/041 | 6 ++++++ tests/qemu-iotests/055 | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index 3d46ed705b..de8ea1588b 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -167,6 +167,9 @@ class TestSingleDrive(iotests.QMPTestCase): 'target image does not match source after mirroring') def test_medium_not_found(self): + if iotests.qemu_default_machine != 'pc': + return + result = self.vm.qmp('drive-mirror', device='ide1-cd0', sync='full', target=target_img) self.assert_qmp(result, 'error/class', 'GenericError') @@ -831,6 +834,9 @@ class TestRepairQuorum(iotests.QMPTestCase): if not self.has_quorum(): return + if iotests.qemu_default_machine != 'pc': + return + result = self.vm.qmp('drive-mirror', device='ide1-cd0', sync='full', node_name='repair0', replaces='img1', diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055 index 017a609f39..e6e0ac4205 100755 --- a/tests/qemu-iotests/055 +++ b/tests/qemu-iotests/055 @@ -104,11 +104,17 @@ class TestSingleDrive(iotests.QMPTestCase): self.do_test_pause('blockdev-backup', 'drive1', blockdev_target_img) def test_medium_not_found(self): + if iotests.qemu_default_machine != 'pc': + return + result = self.vm.qmp('drive-backup', device='ide1-cd0', target=target_img, sync='full') self.assert_qmp(result, 'error/class', 'GenericError') def test_medium_not_found_blockdev_backup(self): + if iotests.qemu_default_machine != 'pc': + return + result = self.vm.qmp('blockdev-backup', device='ide1-cd0', target='drive1', sync='full') self.assert_qmp(result, 'error/class', 'GenericError') @@ -323,6 +329,9 @@ class TestSingleTransaction(iotests.QMPTestCase): self.do_test_pause('blockdev-backup', 'drive1', blockdev_target_img) def do_test_medium_not_found(self, cmd, target): + if iotests.qemu_default_machine != 'pc': + return + result = self.vm.qmp('transaction', actions=[{ 'type': cmd, 'data': { 'device': 'ide1-cd0', From 212789925efffe1c552b114321ee74081a7efb03 Mon Sep 17 00:00:00 2001 From: Bo Tu Date: Fri, 3 Jul 2015 15:28:49 +0800 Subject: [PATCH 04/16] qemu-iotests: s390x: fix test 049, reject negative sizes in QemuOpts when creating an image qemu-img enable us specifying the size of the image using -o size=xx options. But when we specify an invalid size such as a negtive size then different platform gives different result. parse_option_size() function in util/qemu-option.c will be called to parse the size, a cast was called in the function to cast the input (saved as a double in the function) size to an unsigned int64 value, when the input is a negtive value or exceeds the maximum of uint64, then the result is undefined. According to C99 6.3.1.4, the result of converting a floating point number to an integer that cannot represent the (integer part of) number is undefined. And sure enough the results are different on x86 and s390. C99 Language spec 6.3.1.4 Real floating and integers: the result of this assignment/cast is undefined if the float is not in the open interval (-1, U_MAX+1). Reviewed-by: Max Reitz Reviewed-by: Sascha Silbe Signed-off-by: Bo Tu Signed-off-by: Kevin Wolf --- tests/qemu-iotests/049.out | 10 ++++------ util/qemu-option.c | 5 +++++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out index 9f93666c5b..8884543db9 100644 --- a/tests/qemu-iotests/049.out +++ b/tests/qemu-iotests/049.out @@ -95,17 +95,15 @@ qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1024 qemu-img: Image size must be less than 8 EiB! qemu-img create -f qcow2 -o size=-1024 TEST_DIR/t.qcow2 -qemu-img: qcow2 doesn't support shrinking images yet -qemu-img: TEST_DIR/t.qcow2: Could not resize image: Operation not supported -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=-1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +qemu-img: Parameter 'size' expects a non-negative number below 2^64 +qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1k qemu-img: Image size must be less than 8 EiB! qemu-img create -f qcow2 -o size=-1k TEST_DIR/t.qcow2 -qemu-img: qcow2 doesn't support shrinking images yet -qemu-img: TEST_DIR/t.qcow2: Could not resize image: Operation not supported -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=-1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +qemu-img: Parameter 'size' expects a non-negative number below 2^64 +qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- 1kilobyte qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes for diff --git a/util/qemu-option.c b/util/qemu-option.c index efe9d279c4..efd6f024e7 100644 --- a/util/qemu-option.c +++ b/util/qemu-option.c @@ -180,6 +180,11 @@ void parse_option_size(const char *name, const char *value, if (value != NULL) { sizef = strtod(value, &postfix); + if (sizef < 0 || sizef > UINT64_MAX) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, + "a non-negative number below 2^64"); + return; + } switch (*postfix) { case 'T': sizef *= 1024; From 137a905fdf43880c077438d57ef2d319569da9eb Mon Sep 17 00:00:00 2001 From: Bo Tu Date: Fri, 3 Jul 2015 15:28:50 +0800 Subject: [PATCH 05/16] qemu-iotests: s390x: fix test 130 The default device id of hard disk on the s390 platform is "virtio0" which differs to the "ide0-hd0" for the x86 platform. Setting id in the drive definition, ie:"qemu -drive id=testdisk", will be the same on all platforms. Reviewed-by: Max Reitz Signed-off-by: Bo Tu Signed-off-by: Kevin Wolf --- tests/qemu-iotests/130 | 8 ++++---- tests/qemu-iotests/130.out | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/qemu-iotests/130 b/tests/qemu-iotests/130 index bc26247e3f..9209992daa 100755 --- a/tests/qemu-iotests/130 +++ b/tests/qemu-iotests/130 @@ -59,8 +59,8 @@ echo # bdrv_make_empty() involves a header update for qcow2 # Test that a backing file isn't written -_launch_qemu -drive file="$TEST_IMG",backing.file.filename="$TEST_IMG.base" -_send_qemu_cmd $QEMU_HANDLE "commit ide0-hd0" "(qemu)" +_launch_qemu -drive id=testdisk,file="$TEST_IMG",backing.file.filename="$TEST_IMG.base" +_send_qemu_cmd $QEMU_HANDLE "commit testdisk" "(qemu)" _send_qemu_cmd $QEMU_HANDLE '' '(qemu)' _cleanup_qemu _img_info | _filter_img_info @@ -68,8 +68,8 @@ _img_info | _filter_img_info # Make sure that if there was a backing file that was just overridden on the # command line, that backing file is retained, with the right format _make_test_img -F raw -b "$TEST_IMG.orig" 64M -_launch_qemu -drive file="$TEST_IMG",backing.file.filename="$TEST_IMG.base",backing.driver=$IMGFMT -_send_qemu_cmd $QEMU_HANDLE "commit ide0-hd0" "(qemu)" +_launch_qemu -drive id=testdisk,file="$TEST_IMG",backing.file.filename="$TEST_IMG.base",backing.driver=$IMGFMT +_send_qemu_cmd $QEMU_HANDLE "commit testdisk" "(qemu)" _send_qemu_cmd $QEMU_HANDLE '' '(qemu)' _cleanup_qemu _img_info | _filter_img_info diff --git a/tests/qemu-iotests/130.out b/tests/qemu-iotests/130.out index ea68b5d283..9ec9d2a2bb 100644 --- a/tests/qemu-iotests/130.out +++ b/tests/qemu-iotests/130.out @@ -9,14 +9,14 @@ virtual size: 64M (67108864 bytes) === HMP commit === QEMU X.Y.Z monitor - type 'help' for more information -(qemu) ccocomcommcommicommitcommit commit icommit idcommit idecommit ide0commit ide0-commit ide0-hcommit ide0-hdcommit ide0-hd0 +(qemu) ccocomcommcommicommitcommit commit tcommit tecommit tescommit testcommit testdcommit testdicommit testdiscommit testdisk (qemu) image: TEST_DIR/t.IMGFMT file format: IMGFMT virtual size: 64M (67108864 bytes) Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.orig' backing_fmt='raw' QEMU X.Y.Z monitor - type 'help' for more information -(qemu) ccocomcommcommicommitcommit commit icommit idcommit idecommit ide0commit ide0-commit ide0-hcommit ide0-hdcommit ide0-hd0 +(qemu) ccocomcommcommicommitcommit commit tcommit tecommit tescommit testcommit testdcommit testdicommit testdiscommit testdisk (qemu) image: TEST_DIR/t.IMGFMT file format: IMGFMT From bdd03cdf5dc3176bc7169a1d5709303e9279fffb Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 12 Aug 2015 17:33:31 +0200 Subject: [PATCH 06/16] block/raw-posix: Use raw_normalize_devicepath() The filename given to qemu_open() in block/raw-posix.c should generally have been processed by raw_normalize_devicepath(); unless we are only probing (in which case the caller often checks whether the file is a block device or not, and this property will be changed by raw_normalize_devicepath() on NetBSD) or it is about a deprecated device (i.e. floppy). Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- block/raw-posix.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/block/raw-posix.c b/block/raw-posix.c index 855febed52..30df8adf7f 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -670,11 +670,17 @@ static int raw_reopen_prepare(BDRVReopenState *state, /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */ if (raw_s->fd == -1) { - assert(!(raw_s->open_flags & O_CREAT)); - raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags); - if (raw_s->fd == -1) { - error_setg_errno(errp, errno, "Could not reopen file"); - ret = -1; + const char *normalized_filename = state->bs->filename; + ret = raw_normalize_devicepath(&normalized_filename); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not normalize device path"); + } else { + assert(!(raw_s->open_flags & O_CREAT)); + raw_s->fd = qemu_open(normalized_filename, raw_s->open_flags); + if (raw_s->fd == -1) { + error_setg_errno(errp, errno, "Could not reopen file"); + ret = -1; + } } } @@ -2314,6 +2320,12 @@ static int hdev_create(const char *filename, QemuOpts *opts, (void)has_prefix; + ret = raw_normalize_devicepath(&filename); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not normalize device path"); + return ret; + } + /* Read out options */ total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), BDRV_SECTOR_SIZE); From e814dffcc9810ed77fe99081be9751b620a894c4 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Thu, 20 Aug 2015 16:00:38 -0700 Subject: [PATCH 07/16] qemu-img: Fix crash in amend invocation Example: $ ./qemu-img create -f qcow2 /tmp/t.qcow2 64M $ ./qemu-img amend -f qcow2 -o backing_file=/tmp/t.qcow2, -o help \ /tmp/t.qcow2 This should not crash. This actually is tested by iotest 082, but not caught due to the segmentation fault being silent (which is something that needs to be fixed, too). Reported-by: Dr. David Alan Gilbert Cc: qemu-stable Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- qemu-img.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qemu-img.c b/qemu-img.c index 75f4ee4421..6ff4e852b1 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -2931,7 +2931,7 @@ static int img_amend(int argc, char **argv) if (!is_valid_option_list(optarg)) { error_report("Invalid option list: %s", optarg); ret = -1; - goto out; + goto out_no_progress; } if (!options) { options = g_strdup(optarg); @@ -3031,6 +3031,7 @@ static int img_amend(int argc, char **argv) out: qemu_progress_end(); +out_no_progress: blk_unref(blk); qemu_opts_del(opts); qemu_opts_free(create_opts); From 8e4922535b6479c7a2fa6b14b0148c6ae4fcc003 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 2 Sep 2015 20:52:25 +0200 Subject: [PATCH 08/16] iotests: More options for VM.add_drive() This patch allows specifying the interface to be used for the drive, and makes specifying a path optional (if the path is None, the "file" option will be omitted, thus creating an empty drive). Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/iotests.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 557925314b..1f913a1b2c 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -118,13 +118,16 @@ class VM(object): self._args.append('-monitor') self._args.append(args) - def add_drive(self, path, opts=''): + def add_drive(self, path, opts='', interface='virtio'): '''Add a virtio-blk drive to the VM''' - options = ['if=virtio', + options = ['if=%s' % interface, 'format=%s' % imgfmt, 'cache=%s' % cachemode, - 'file=%s' % path, 'id=drive%d' % self._num_drives] + + if path is not None: + options.append('file=%s' % path) + if opts: options.append(opts) From 0ed82f7a096537923ef3705946f254d2f61eaf93 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 2 Sep 2015 20:52:26 +0200 Subject: [PATCH 09/16] iotests: Respect -nodefaults in tests 41 and 55 While -nodefaults is set in $QEMU_OPTIONS, this is currently (wrongly) ignored for Python iotests. In order to be prepared for when this is fixed, we should explicitly add an IDE CD-ROM drive instead of relying on it being created automatically. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/041 | 12 +++++++++--- tests/qemu-iotests/055 | 10 +++++++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index de8ea1588b..38ca5f13c7 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -42,6 +42,8 @@ class TestSingleDrive(iotests.QMPTestCase): iotests.create_image(backing_img, self.image_len) qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img) self.vm = iotests.VM().add_drive(test_img) + if iotests.qemu_default_machine == 'pc': + self.vm.add_drive(None, 'media=cdrom', 'ide') self.vm.launch() def tearDown(self): @@ -170,8 +172,8 @@ class TestSingleDrive(iotests.QMPTestCase): if iotests.qemu_default_machine != 'pc': return - result = self.vm.qmp('drive-mirror', device='ide1-cd0', sync='full', - target=target_img) + result = self.vm.qmp('drive-mirror', device='drive1', # CD-ROM + sync='full', target=target_img) self.assert_qmp(result, 'error/class', 'GenericError') def test_image_not_found(self): @@ -710,6 +712,9 @@ class TestRepairQuorum(iotests.QMPTestCase): def setUp(self): self.vm = iotests.VM() + if iotests.qemu_default_machine == 'pc': + self.vm.add_drive(None, 'media=cdrom', 'ide') + # Add each individual quorum images for i in self.IMAGES: qemu_img('create', '-f', iotests.imgfmt, i, @@ -837,7 +842,8 @@ class TestRepairQuorum(iotests.QMPTestCase): if iotests.qemu_default_machine != 'pc': return - result = self.vm.qmp('drive-mirror', device='ide1-cd0', sync='full', + result = self.vm.qmp('drive-mirror', device='drive0', # CD-ROM + sync='full', node_name='repair0', replaces='img1', target=quorum_repair_img, format=iotests.imgfmt) diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055 index e6e0ac4205..c8e3578702 100755 --- a/tests/qemu-iotests/055 +++ b/tests/qemu-iotests/055 @@ -42,6 +42,8 @@ class TestSingleDrive(iotests.QMPTestCase): qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(TestSingleDrive.image_len)) self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img) + if iotests.qemu_default_machine == 'pc': + self.vm.add_drive(None, 'media=cdrom', 'ide') self.vm.launch() def tearDown(self): @@ -107,7 +109,7 @@ class TestSingleDrive(iotests.QMPTestCase): if iotests.qemu_default_machine != 'pc': return - result = self.vm.qmp('drive-backup', device='ide1-cd0', + result = self.vm.qmp('drive-backup', device='drive2', # CD-ROM target=target_img, sync='full') self.assert_qmp(result, 'error/class', 'GenericError') @@ -115,7 +117,7 @@ class TestSingleDrive(iotests.QMPTestCase): if iotests.qemu_default_machine != 'pc': return - result = self.vm.qmp('blockdev-backup', device='ide1-cd0', + result = self.vm.qmp('blockdev-backup', device='drive2', # CD-ROM target='drive1', sync='full') self.assert_qmp(result, 'error/class', 'GenericError') @@ -255,6 +257,8 @@ class TestSingleTransaction(iotests.QMPTestCase): qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(TestSingleDrive.image_len)) self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img) + if iotests.qemu_default_machine == 'pc': + self.vm.add_drive(None, 'media=cdrom', 'ide') self.vm.launch() def tearDown(self): @@ -334,7 +338,7 @@ class TestSingleTransaction(iotests.QMPTestCase): result = self.vm.qmp('transaction', actions=[{ 'type': cmd, - 'data': { 'device': 'ide1-cd0', + 'data': { 'device': 'drive2', # CD-ROM 'target': target, 'sync': 'full' }, } From 934659c460d46c948cf348822fda1d38556ed9a4 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 2 Sep 2015 20:52:27 +0200 Subject: [PATCH 10/16] iotests: Do not suppress segfaults in bash tests Currently, if a qemu/qemu-io/qemu-img/qemu-nbd invocation receives a segmentation fault, that message is invisible in most cases since the output is generally filtered and bash suppresses the segmentation fault notice for any but the last element of a pipe. Most of the time, the test will then fail anyway because of missing output, but not necessarily (as happened with test 82 recently). Fix this by making the corresponding environment variables point to wrapper functions which execute the respective command in a subshell. Giving options to qemu/qemu-io/qemu-img and path names with spaces were broken for the Python tests; this patch "accidentally" fixes that. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/039 | 19 ++++++------------- tests/qemu-iotests/039.out | 6 +++--- tests/qemu-iotests/061 | 6 ++++-- tests/qemu-iotests/061.out | 2 ++ tests/qemu-iotests/check | 8 ++++---- tests/qemu-iotests/common.config | 29 +++++++++++++++++++++++++---- tests/qemu-iotests/common.rc | 12 +++++++++++- tests/qemu-iotests/iotests.py | 16 ++++++++++++---- 8 files changed, 67 insertions(+), 31 deletions(-) diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039 index 859705f848..617f3977cc 100755 --- a/tests/qemu-iotests/039 +++ b/tests/qemu-iotests/039 @@ -47,13 +47,6 @@ _supported_os Linux _default_cache_mode "writethrough" _supported_cache_modes "writethrough" -_subshell_exec() -{ - # Executing crashing commands in a subshell prevents information like the - # "Killed" line from being lost - (exec "$@") -} - size=128M echo @@ -74,8 +67,8 @@ echo "== Creating a dirty image file ==" IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img $size -_subshell_exec $QEMU_IO -c "write -P 0x5a 0 512" \ - -c "sigraise $(kill -l KILL)" "$TEST_IMG" 2>&1 \ +$QEMU_IO -c "write -P 0x5a 0 512" \ + -c "sigraise $(kill -l KILL)" "$TEST_IMG" 2>&1 \ | _filter_qemu_io # The dirty bit must be set @@ -109,8 +102,8 @@ echo "== Opening a dirty image read/write should repair it ==" IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img $size -_subshell_exec $QEMU_IO -c "write -P 0x5a 0 512" \ - -c "sigraise $(kill -l KILL)" "$TEST_IMG" 2>&1 \ +$QEMU_IO -c "write -P 0x5a 0 512" \ + -c "sigraise $(kill -l KILL)" "$TEST_IMG" 2>&1 \ | _filter_qemu_io # The dirty bit must be set @@ -127,8 +120,8 @@ echo "== Creating an image file with lazy_refcounts=off ==" IMGOPTS="compat=1.1,lazy_refcounts=off" _make_test_img $size -_subshell_exec $QEMU_IO -c "write -P 0x5a 0 512" \ - -c "sigraise $(kill -l KILL)" "$TEST_IMG" 2>&1 \ +$QEMU_IO -c "write -P 0x5a 0 512" \ + -c "sigraise $(kill -l KILL)" "$TEST_IMG" 2>&1 \ | _filter_qemu_io # The dirty bit must not be set since lazy_refcounts=off diff --git a/tests/qemu-iotests/039.out b/tests/qemu-iotests/039.out index d09751f9c8..b055670b8c 100644 --- a/tests/qemu-iotests/039.out +++ b/tests/qemu-iotests/039.out @@ -11,7 +11,7 @@ No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -./039: Killed ( exec "$@" ) +./common.config: Killed ( exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@" ) incompatible_features 0x1 ERROR cluster 5 refcount=0 reference=1 ERROR OFLAG_COPIED data cluster: l2_entry=8000000000050000 refcount=0 @@ -46,7 +46,7 @@ read 512/512 bytes at offset 0 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -./039: Killed ( exec "$@" ) +./common.config: Killed ( exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@" ) incompatible_features 0x1 ERROR cluster 5 refcount=0 reference=1 Rebuilding refcount structure @@ -60,7 +60,7 @@ incompatible_features 0x0 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -./039: Killed ( exec "$@" ) +./common.config: Killed ( exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@" ) incompatible_features 0x0 No errors were found on the image. diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061 index 8d37f8a65c..1df887a01d 100755 --- a/tests/qemu-iotests/061 +++ b/tests/qemu-iotests/061 @@ -58,7 +58,8 @@ echo echo "=== Testing dirty version downgrade ===" echo IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M -$QEMU_IO -c "write -P 0x2a 0 128k" -c flush -c abort "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -P 0x2a 0 128k" -c flush -c abort "$TEST_IMG" 2>&1 \ + | _filter_qemu_io $PYTHON qcow2.py "$TEST_IMG" dump-header $QEMU_IMG amend -o "compat=0.10" "$TEST_IMG" $PYTHON qcow2.py "$TEST_IMG" dump-header @@ -91,7 +92,8 @@ echo echo "=== Testing dirty lazy_refcounts=off ===" echo IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M -$QEMU_IO -c "write -P 0x2a 0 128k" -c flush -c abort "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -P 0x2a 0 128k" -c flush -c abort "$TEST_IMG" 2>&1 \ + | _filter_qemu_io $PYTHON qcow2.py "$TEST_IMG" dump-header $QEMU_IMG amend -o "lazy_refcounts=off" "$TEST_IMG" $PYTHON qcow2.py "$TEST_IMG" dump-header diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out index 5ec248f79b..4505376433 100644 --- a/tests/qemu-iotests/061.out +++ b/tests/qemu-iotests/061.out @@ -57,6 +57,7 @@ No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 wrote 131072/131072 bytes at offset 0 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +./common.config: Aborted (core dumped) ( exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@" ) magic 0x514649fb version 3 backing_file_offset 0x0 @@ -214,6 +215,7 @@ No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 wrote 131072/131072 bytes at offset 0 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +./common.config: Aborted (core dumped) ( exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@" ) magic 0x514649fb version 3 backing_file_offset 0x0 diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index 6d582033e8..c350f16b6a 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -231,10 +231,10 @@ FULL_HOST_DETAILS=`_full_platform_details` #FULL_MOUNT_OPTIONS=`_scratch_mount_options` cat < Date: Wed, 2 Sep 2015 20:52:28 +0200 Subject: [PATCH 11/16] iotests: Warn if python subprocess is killed Currently, if a subprocess of a python test (i.e. qemu-io, qemu-img, or qemu) receives a signal and is subsequently aborted, this is not logged. This patch makes python tests always check the exit code of these subprocesses, and emit a message if they have been killed. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/iotests.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 8e3419ff9e..ff5905f379 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -57,20 +57,34 @@ socket_scm_helper = os.environ.get('SOCKET_SCM_HELPER', 'socket_scm_helper') def qemu_img(*args): '''Run qemu-img and return the exit code''' devnull = open('/dev/null', 'r+') - return subprocess.call(qemu_img_args + list(args), stdin=devnull, stdout=devnull) + exitcode = subprocess.call(qemu_img_args + list(args), stdin=devnull, stdout=devnull) + if exitcode < 0: + sys.stderr.write('qemu-img received signal %i: %s\n' % (-exitcode, ' '.join(qemu_img_args + list(args)))) + return exitcode def qemu_img_verbose(*args): '''Run qemu-img without suppressing its output and return the exit code''' - return subprocess.call(qemu_img_args + list(args)) + exitcode = subprocess.call(qemu_img_args + list(args)) + if exitcode < 0: + sys.stderr.write('qemu-img received signal %i: %s\n' % (-exitcode, ' '.join(qemu_img_args + list(args)))) + return exitcode def qemu_img_pipe(*args): '''Run qemu-img and return its output''' - return subprocess.Popen(qemu_img_args + list(args), stdout=subprocess.PIPE).communicate()[0] + subp = subprocess.Popen(qemu_img_args + list(args), stdout=subprocess.PIPE) + exitcode = subp.wait() + if exitcode < 0: + sys.stderr.write('qemu-img received signal %i: %s\n' % (-exitcode, ' '.join(qemu_img_args + list(args)))) + return subp.communicate()[0] def qemu_io(*args): '''Run qemu-io and return the stdout data''' args = qemu_io_args + list(args) - return subprocess.Popen(args, stdout=subprocess.PIPE).communicate()[0] + subp = subprocess.Popen(args, stdout=subprocess.PIPE) + exitcode = subp.wait() + if exitcode < 0: + sys.stderr.write('qemu-io received signal %i: %s\n' % (-exitcode, ' '.join(args))) + return subp.communicate()[0] def compare_images(img1, img2): '''Return True if two image files are identical''' @@ -208,7 +222,9 @@ class VM(object): '''Terminate the VM and clean up''' if not self._popen is None: self._qmp.cmd('quit') - self._popen.wait() + exitcode = self._popen.wait() + if exitcode < 0: + sys.stderr.write('qemu received signal %i: %s\n' % (-exitcode, ' '.join(self._args))) os.remove(self._monitor_path) os.remove(self._qtest_path) os.remove(self._qemu_log_path) From 355ee2d0e8ca536a6278c9c763ddd2f136eace3f Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Tue, 4 Aug 2015 15:14:39 +0300 Subject: [PATCH 12/16] qcow2: mark the memory as no longer needed after qcow2_cache_empty() After having emptied the cache, the data in the cache tables is no longer useful, so we can tell the kernel that we are done with it. In Linux this frees the resources associated with it. The effect of this can be seen in the HMP commit operation: it moves data from the top to the base image (and fills both caches), then it empties the top image. At this point the data in that cache is no longer needed so it's just wasting memory. Signed-off-by: Alberto Garcia Reviewed-by: Max Reitz Message-id: 08538b098e1faf6c92496477cf9b47a20e5aacea.1438690126.git.berto@igalia.com Signed-off-by: Max Reitz --- block/qcow2-cache.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index 53b8afc3d3..f63e7d8e70 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -22,8 +22,16 @@ * THE SOFTWARE. */ +/* Needed for CONFIG_MADVISE */ +#include "config-host.h" + +#if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE) +#include +#endif + #include "block/block_int.h" #include "qemu-common.h" +#include "qemu/osdep.h" #include "qcow2.h" #include "trace.h" @@ -60,6 +68,22 @@ static inline int qcow2_cache_get_table_idx(BlockDriverState *bs, return idx; } +static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c, + int i, int num_tables) +{ +#if QEMU_MADV_DONTNEED != QEMU_MADV_INVALID + BDRVQcowState *s = bs->opaque; + void *t = qcow2_cache_get_table_addr(bs, c, i); + int align = getpagesize(); + size_t mem_size = (size_t) s->cluster_size * num_tables; + size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t; + size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align); + if (length > 0) { + qemu_madvise((uint8_t *) t + offset, length, QEMU_MADV_DONTNEED); + } +#endif +} + Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables) { BDRVQcowState *s = bs->opaque; @@ -237,6 +261,8 @@ int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c) c->entries[i].lru_counter = 0; } + qcow2_cache_table_release(bs, c, 0, c->size); + c->lru_counter = 0; return 0; From 279621c046ce57de0af9e3c00663b48d3a7835ae Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Tue, 4 Aug 2015 15:14:40 +0300 Subject: [PATCH 13/16] qcow2: add option to clean unused cache entries after some time This adds a new 'cache-clean-interval' option that cleans all qcow2 cache entries that haven't been used in a certain interval, given in seconds. This allows setting a large L2 cache size so it can handle scenarios with lots of I/O and at the same time use little memory during periods of inactivity. This feature currently relies on MADV_DONTNEED to free that memory, so it is not useful in systems that don't follow that behavior. Signed-off-by: Alberto Garcia Reviewed-by: Max Reitz Message-id: a70d12da60433df9360ada648b3f34b8f6f354ce.1438690126.git.berto@igalia.com Signed-off-by: Max Reitz --- block/qcow2-cache.c | 35 ++++++++++++++++++++++++ block/qcow2.c | 64 ++++++++++++++++++++++++++++++++++++++++++++ block/qcow2.h | 4 +++ qapi/block-core.json | 7 ++++- 4 files changed, 109 insertions(+), 1 deletion(-) diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index f63e7d8e70..8457458418 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -49,6 +49,7 @@ struct Qcow2Cache { bool depends_on_flush; void *table_array; uint64_t lru_counter; + uint64_t cache_clean_lru_counter; }; static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs, @@ -84,6 +85,40 @@ static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c, #endif } +static inline bool can_clean_entry(Qcow2Cache *c, int i) +{ + Qcow2CachedTable *t = &c->entries[i]; + return t->ref == 0 && !t->dirty && t->offset != 0 && + t->lru_counter <= c->cache_clean_lru_counter; +} + +void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c) +{ + int i = 0; + while (i < c->size) { + int to_clean = 0; + + /* Skip the entries that we don't need to clean */ + while (i < c->size && !can_clean_entry(c, i)) { + i++; + } + + /* And count how many we can clean in a row */ + while (i < c->size && can_clean_entry(c, i)) { + c->entries[i].offset = 0; + c->entries[i].lru_counter = 0; + i++; + to_clean++; + } + + if (to_clean > 0) { + qcow2_cache_table_release(bs, c, i - to_clean, to_clean); + } + } + + c->cache_clean_lru_counter = c->lru_counter; +} + Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables) { BDRVQcowState *s = bs->opaque; diff --git a/block/qcow2.c b/block/qcow2.c index 76c331b387..ea34ae2da5 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -467,6 +467,11 @@ static QemuOptsList qcow2_runtime_opts = { .type = QEMU_OPT_SIZE, .help = "Maximum refcount block cache size", }, + { + .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL, + .type = QEMU_OPT_NUMBER, + .help = "Clean unused cache entries after this time (in seconds)", + }, { /* end of list */ } }, }; @@ -482,6 +487,49 @@ static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = { [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2, }; +static void cache_clean_timer_cb(void *opaque) +{ + BlockDriverState *bs = opaque; + BDRVQcowState *s = bs->opaque; + qcow2_cache_clean_unused(bs, s->l2_table_cache); + qcow2_cache_clean_unused(bs, s->refcount_block_cache); + timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + + (int64_t) s->cache_clean_interval * 1000); +} + +static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context) +{ + BDRVQcowState *s = bs->opaque; + if (s->cache_clean_interval > 0) { + s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL, + SCALE_MS, cache_clean_timer_cb, + bs); + timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + + (int64_t) s->cache_clean_interval * 1000); + } +} + +static void cache_clean_timer_del(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + if (s->cache_clean_timer) { + timer_del(s->cache_clean_timer); + timer_free(s->cache_clean_timer); + s->cache_clean_timer = NULL; + } +} + +static void qcow2_detach_aio_context(BlockDriverState *bs) +{ + cache_clean_timer_del(bs); +} + +static void qcow2_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + cache_clean_timer_init(bs, new_context); +} + static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, uint64_t *l2_cache_size, uint64_t *refcount_cache_size, Error **errp) @@ -555,6 +603,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, const char *opt_overlap_check, *opt_overlap_check_template; int overlap_check_template = 0; uint64_t l2_cache_size, refcount_cache_size; + uint64_t cache_clean_interval; ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); if (ret < 0) { @@ -848,6 +897,16 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } + cache_clean_interval = + qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL, 0); + if (cache_clean_interval > UINT_MAX) { + error_setg(errp, "Cache clean interval too big"); + ret = -EINVAL; + goto fail; + } + s->cache_clean_interval = cache_clean_interval; + cache_clean_timer_init(bs, bdrv_get_aio_context(bs)); + s->cluster_cache = g_malloc(s->cluster_size); /* one more sector for decompressed data alignment */ s->cluster_data = qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS @@ -1013,6 +1072,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, qemu_vfree(s->l1_table); /* else pre-write overlap checks in cache_destroy may crash */ s->l1_table = NULL; + cache_clean_timer_del(bs); if (s->l2_table_cache) { qcow2_cache_destroy(bs, s->l2_table_cache); } @@ -1471,6 +1531,7 @@ static void qcow2_close(BlockDriverState *bs) } } + cache_clean_timer_del(bs); qcow2_cache_destroy(bs, s->l2_table_cache); qcow2_cache_destroy(bs, s->refcount_block_cache); @@ -2977,6 +3038,9 @@ BlockDriver bdrv_qcow2 = { .create_opts = &qcow2_create_opts, .bdrv_check = qcow2_check, .bdrv_amend_options = qcow2_amend_options, + + .bdrv_detach_aio_context = qcow2_detach_aio_context, + .bdrv_attach_aio_context = qcow2_attach_aio_context, }; static void bdrv_qcow2_init(void) diff --git a/block/qcow2.h b/block/qcow2.h index 72e132838a..71dafd6dc9 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -96,6 +96,7 @@ #define QCOW2_OPT_CACHE_SIZE "cache-size" #define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size" #define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size" +#define QCOW2_OPT_CACHE_CLEAN_INTERVAL "cache-clean-interval" typedef struct QCowHeader { uint32_t magic; @@ -239,6 +240,8 @@ typedef struct BDRVQcowState { Qcow2Cache* l2_table_cache; Qcow2Cache* refcount_block_cache; + QEMUTimer *cache_clean_timer; + unsigned cache_clean_interval; uint8_t *cluster_cache; uint8_t *cluster_data; @@ -581,6 +584,7 @@ int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c, Qcow2Cache *dependency); void qcow2_cache_depends_on_flush(Qcow2Cache *c); +void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c); int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c); int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, diff --git a/qapi/block-core.json b/qapi/block-core.json index 7b2efb8678..bb2189ef3a 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1592,6 +1592,10 @@ # @refcount-cache-size: #optional the maximum size of the refcount block cache # in bytes (since 2.2) # +# @cache-clean-interval: #optional clean unused entries in the L2 and refcount +# caches. The interval is in seconds. The default value +# is 0 and it disables this feature (since 2.5) +# # Since: 1.7 ## { 'struct': 'BlockdevOptionsQcow2', @@ -1603,7 +1607,8 @@ '*overlap-check': 'Qcow2OverlapChecks', '*cache-size': 'int', '*l2-cache-size': 'int', - '*refcount-cache-size': 'int' } } + '*refcount-cache-size': 'int', + '*cache-clean-interval': 'int' } } ## From 7f65ce834accce0b7e4bc79313bacf229b957783 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Tue, 4 Aug 2015 15:14:41 +0300 Subject: [PATCH 14/16] docs: document how to configure the qcow2 L2/refcount caches QEMU has options to configure the size of the L2 and refcount caches for the qcow2 format. However, choosing the right sizes for a particular disk image is not a straightforward operation since the ratio between the cache size and the allocated disk space is not obvious and depends on the size of the cluster and the refcount entries. This document attempts to give an overview of both caches and how to configure their sizes. Signed-off-by: Alberto Garcia Reviewed-by: Max Reitz Message-id: 55de928e139b1ba3f3d40fe9c6c88f30b1f36410.1438690126.git.berto@igalia.com Signed-off-by: Max Reitz --- docs/qcow2-cache.txt | 164 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 docs/qcow2-cache.txt diff --git a/docs/qcow2-cache.txt b/docs/qcow2-cache.txt new file mode 100644 index 0000000000..5bb06072d3 --- /dev/null +++ b/docs/qcow2-cache.txt @@ -0,0 +1,164 @@ +qcow2 L2/refcount cache configuration +===================================== +Copyright (C) 2015 Igalia, S.L. +Author: Alberto Garcia + +This work is licensed under the terms of the GNU GPL, version 2 or +later. See the COPYING file in the top-level directory. + +Introduction +------------ +The QEMU qcow2 driver has two caches that can improve the I/O +performance significantly. However, setting the right cache sizes is +not a straightforward operation. + +This document attempts to give an overview of the L2 and refcount +caches, and how to configure them. + +Please refer to the docs/specs/qcow2.txt file for an in-depth +technical description of the qcow2 file format. + + +Clusters +-------- +A qcow2 file is organized in units of constant size called clusters. + +The cluster size is configurable, but it must be a power of two and +its value 512 bytes or higher. QEMU currently defaults to 64 KB +clusters, and it does not support sizes larger than 2MB. + +The 'qemu-img create' command supports specifying the size using the +cluster_size option: + + qemu-img create -f qcow2 -o cluster_size=128K hd.qcow2 4G + + +The L2 tables +------------- +The qcow2 format uses a two-level structure to map the virtual disk as +seen by the guest to the disk image in the host. These structures are +called the L1 and L2 tables. + +There is one single L1 table per disk image. The table is small and is +always kept in memory. + +There can be many L2 tables, depending on how much space has been +allocated in the image. Each table is one cluster in size. In order to +read or write data from the virtual disk, QEMU needs to read its +corresponding L2 table to find out where that data is located. Since +reading the table for each I/O operation can be expensive, QEMU keeps +an L2 cache in memory to speed up disk access. + +The size of the L2 cache can be configured, and setting the right +value can improve the I/O performance significantly. + + +The refcount blocks +------------------- +The qcow2 format also mantains a reference count for each cluster. +Reference counts are used for cluster allocation and internal +snapshots. The data is stored in a two-level structure similar to the +L1/L2 tables described above. + +The second level structures are called refcount blocks, are also one +cluster in size and the number is also variable and dependent on the +amount of allocated space. + +Each block contains a number of refcount entries. Their size (in bits) +is a power of two and must not be higher than 64. It defaults to 16 +bits, but a different value can be set using the refcount_bits option: + + qemu-img create -f qcow2 -o refcount_bits=8 hd.qcow2 4G + +QEMU keeps a refcount cache to speed up I/O much like the +aforementioned L2 cache, and its size can also be configured. + + +Choosing the right cache sizes +------------------------------ +In order to choose the cache sizes we need to know how they relate to +the amount of allocated space. + +The amount of virtual disk that can be mapped by the L2 and refcount +caches (in bytes) is: + + disk_size = l2_cache_size * cluster_size / 8 + disk_size = refcount_cache_size * cluster_size * 8 / refcount_bits + +With the default values for cluster_size (64KB) and refcount_bits +(16), that is + + disk_size = l2_cache_size * 8192 + disk_size = refcount_cache_size * 32768 + +So in order to cover n GB of disk space with the default values we +need: + + l2_cache_size = disk_size_GB * 131072 + refcount_cache_size = disk_size_GB * 32768 + +QEMU has a default L2 cache of 1MB (1048576 bytes) and a refcount +cache of 256KB (262144 bytes), so using the formulas we've just seen +we have + + 1048576 / 131072 = 8 GB of virtual disk covered by that cache + 262144 / 32768 = 8 GB + + +How to configure the cache sizes +-------------------------------- +Cache sizes can be configured using the -drive option in the +command-line, or the 'blockdev-add' QMP command. + +There are three options available, and all of them take bytes: + +"l2-cache-size": maximum size of the L2 table cache +"refcount-cache-size": maximum size of the refcount block cache +"cache-size": maximum size of both caches combined + +There are two things that need to be taken into account: + + - Both caches must have a size that is a multiple of the cluster + size. + + - If you only set one of the options above, QEMU will automatically + adjust the others so that the L2 cache is 4 times bigger than the + refcount cache. + +This means that these options are equivalent: + + -drive file=hd.qcow2,l2-cache-size=2097152 + -drive file=hd.qcow2,refcount-cache-size=524288 + -drive file=hd.qcow2,cache-size=2621440 + +The reason for this 1/4 ratio is to ensure that both caches cover the +same amount of disk space. Note however that this is only valid with +the default value of refcount_bits (16). If you are using a different +value you might want to calculate both cache sizes yourself since QEMU +will always use the same 1/4 ratio. + +It's also worth mentioning that there's no strict need for both caches +to cover the same amount of disk space. The refcount cache is used +much less often than the L2 cache, so it's perfectly reasonable to +keep it small. + + +Reducing the memory usage +------------------------- +It is possible to clean unused cache entries in order to reduce the +memory usage during periods of low I/O activity. + +The parameter "cache-clean-interval" defines an interval (in seconds). +All cache entries that haven't been accessed during that interval are +removed from memory. + +This example removes all unused cache entries every 15 minutes: + + -drive file=hd.qcow2,cache-clean-interval=900 + +If unset, the default value for this parameter is 0 and it disables +this feature. + +Note that this functionality currently relies on the MADV_DONTNEED +argument for madvise() to actually free the memory, so it is not +useful in systems that don't follow that behavior. From 909c260c71d1bee7018e17034580ffd0743508db Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Tue, 4 Aug 2015 15:14:42 +0300 Subject: [PATCH 15/16] qcow2: reorder fields in Qcow2CachedTable to reduce padding Changing the current ordering saves 8 bytes per cache entry in x86_64. Signed-off-by: Alberto Garcia Reviewed-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Message-id: 0bd55291211df3dfb514d0e7d2031dd5c4f9f807.1438690126.git.berto@igalia.com Signed-off-by: Max Reitz --- block/qcow2-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index 8457458418..046f5b8e48 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -37,9 +37,9 @@ typedef struct Qcow2CachedTable { int64_t offset; - bool dirty; uint64_t lru_counter; int ref; + bool dirty; } Qcow2CachedTable; struct Qcow2Cache { From 834cb2ada5db197a11c99142d50222945d196fc0 Mon Sep 17 00:00:00 2001 From: Wen Congyang Date: Fri, 3 Jul 2015 14:45:06 +0800 Subject: [PATCH 16/16] quorum: validate vote threshold against num_children even if read-pattern is fifo We need to use threshold to check if too many write operation fails. If threshold is larger than num children, we always get write error event even if all write operations success. Signed-off-by: Wen Congyang Message-id: 55962F72.3060003@cn.fujitsu.com Reviewed-by: Alberto Garcia Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- block/quorum.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/block/quorum.c b/block/quorum.c index 2f6c45f760..8fe53b4272 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -889,6 +889,12 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, } s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0); + /* and validate it against s->num_children */ + ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err); + if (ret < 0) { + goto exit; + } + ret = parse_read_pattern(qemu_opt_get(opts, QUORUM_OPT_READ_PATTERN)); if (ret < 0) { error_setg(&local_err, "Please set read-pattern as fifo or quorum"); @@ -897,12 +903,6 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, s->read_pattern = ret; if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) { - /* and validate it against s->num_children */ - ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err); - if (ret < 0) { - goto exit; - } - /* is the driver in blkverify mode */ if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) && s->num_children == 2 && s->threshold == 2) {