From 79ef0cebb5694411e7452f0cf15c4bd170c7f2d6 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 7 Apr 2022 16:27:20 +0300 Subject: [PATCH 1/9] block/copy-before-write: refactor option parsing We are going to add one more option of enum type. Let's refactor option parsing so that we can simply work with BlockdevOptionsCbw object. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/copy-before-write.c | 56 ++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/block/copy-before-write.c b/block/copy-before-write.c index a8a06fdc09..e29c46cd7a 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -24,6 +24,7 @@ */ #include "qemu/osdep.h" +#include "qapi/qmp/qjson.h" #include "sysemu/block-backend.h" #include "qemu/cutils.h" @@ -328,46 +329,34 @@ static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c, } } -static bool cbw_parse_bitmap_option(QDict *options, BdrvDirtyBitmap **bitmap, - Error **errp) +static BlockdevOptions *cbw_parse_options(QDict *options, Error **errp) { - QDict *bitmap_qdict = NULL; - BlockDirtyBitmap *bmp_param = NULL; + BlockdevOptions *opts = NULL; Visitor *v = NULL; - bool ret = false; - *bitmap = NULL; + qdict_put_str(options, "driver", "copy-before-write"); - qdict_extract_subqdict(options, &bitmap_qdict, "bitmap."); - if (!qdict_size(bitmap_qdict)) { - ret = true; - goto out; - } - - v = qobject_input_visitor_new_flat_confused(bitmap_qdict, errp); + v = qobject_input_visitor_new_flat_confused(options, errp); if (!v) { goto out; } - visit_type_BlockDirtyBitmap(v, NULL, &bmp_param, errp); - if (!bmp_param) { + visit_type_BlockdevOptions(v, NULL, &opts, errp); + if (!opts) { goto out; } - *bitmap = block_dirty_bitmap_lookup(bmp_param->node, bmp_param->name, NULL, - errp); - if (!*bitmap) { - goto out; - } - - ret = true; + /* + * Delete options which we are going to parse through BlockdevOptions + * object for original options. + */ + qdict_extract_subqdict(options, NULL, "bitmap"); out: - qapi_free_BlockDirtyBitmap(bmp_param); visit_free(v); - qobject_unref(bitmap_qdict); + qdict_del(options, "driver"); - return ret; + return opts; } static int cbw_open(BlockDriverState *bs, QDict *options, int flags, @@ -376,6 +365,15 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, BDRVCopyBeforeWriteState *s = bs->opaque; BdrvDirtyBitmap *bitmap = NULL; int64_t cluster_size; + g_autoptr(BlockdevOptions) full_opts = NULL; + BlockdevOptionsCbw *opts; + + full_opts = cbw_parse_options(options, errp); + if (!full_opts) { + return -EINVAL; + } + assert(full_opts->driver == BLOCKDEV_DRIVER_COPY_BEFORE_WRITE); + opts = &full_opts->u.copy_before_write; bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, @@ -390,8 +388,12 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, return -EINVAL; } - if (!cbw_parse_bitmap_option(options, &bitmap, errp)) { - return -EINVAL; + if (opts->has_bitmap) { + bitmap = block_dirty_bitmap_lookup(opts->bitmap->node, + opts->bitmap->name, NULL, errp); + if (!bitmap) { + return -EINVAL; + } } bs->total_sectors = bs->file->bs->total_sectors; From f1bb39a8a5b6d486faa1a51a7f28c577155642c9 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 7 Apr 2022 16:27:21 +0300 Subject: [PATCH 2/9] block/copy-before-write: add on-cbw-error open parameter Currently, behavior on copy-before-write operation failure is simple: report error to the guest. Let's implement alternative behavior: break the whole copy-before-write process (and corresponding backup job or NBD client) but keep guest working. It's needed if we consider guest stability as more important. The realisation is simple: on copy-before-write failure we set s->snapshot_ret and continue guest operations. s->snapshot_ret being set will lead to all further snapshot API requests. Note that all in-flight snapshot-API requests may still success: we do wait for them on BREAK_SNAPSHOT-failure path in cbw_do_copy_before_write(). Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/copy-before-write.c | 32 ++++++++++++++++++++++++++++++-- qapi/block-core.json | 25 ++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/block/copy-before-write.c b/block/copy-before-write.c index e29c46cd7a..c8a11a09d2 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -41,6 +41,7 @@ typedef struct BDRVCopyBeforeWriteState { BlockCopyState *bcs; BdrvChild *target; + OnCbwError on_cbw_error; /* * @lock: protects access to @access_bitmap, @done_bitmap and @@ -65,6 +66,14 @@ typedef struct BDRVCopyBeforeWriteState { * node. These areas must not be rewritten by guest. */ BlockReqList frozen_read_reqs; + + /* + * @snapshot_error is normally zero. But on first copy-before-write failure + * when @on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT, @snapshot_error takes + * value of this error (<0). After that all in-flight and further + * snapshot-API requests will fail with that error. + */ + int snapshot_error; } BDRVCopyBeforeWriteState; static coroutine_fn int cbw_co_preadv( @@ -95,16 +104,27 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs, return 0; } + if (s->snapshot_error) { + return 0; + } + off = QEMU_ALIGN_DOWN(offset, cluster_size); end = QEMU_ALIGN_UP(offset + bytes, cluster_size); ret = block_copy(s->bcs, off, end - off, true); - if (ret < 0) { + if (ret < 0 && s->on_cbw_error == ON_CBW_ERROR_BREAK_GUEST_WRITE) { return ret; } WITH_QEMU_LOCK_GUARD(&s->lock) { - bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off); + if (ret < 0) { + assert(s->on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT); + if (!s->snapshot_error) { + s->snapshot_error = ret; + } + } else { + bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off); + } reqlist_wait_all(&s->frozen_read_reqs, off, end - off, &s->lock); } @@ -176,6 +196,11 @@ static BlockReq *cbw_snapshot_read_lock(BlockDriverState *bs, QEMU_LOCK_GUARD(&s->lock); + if (s->snapshot_error) { + g_free(req); + return NULL; + } + if (bdrv_dirty_bitmap_next_zero(s->access_bitmap, offset, bytes) != -1) { g_free(req); return NULL; @@ -351,6 +376,7 @@ static BlockdevOptions *cbw_parse_options(QDict *options, Error **errp) * object for original options. */ qdict_extract_subqdict(options, NULL, "bitmap"); + qdict_del(options, "on-cbw-error"); out: visit_free(v); @@ -395,6 +421,8 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, return -EINVAL; } } + s->on_cbw_error = opts->has_on_cbw_error ? opts->on_cbw_error : + ON_CBW_ERROR_BREAK_GUEST_WRITE; bs->total_sectors = bs->file->bs->total_sectors; bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | diff --git a/qapi/block-core.json b/qapi/block-core.json index 457df16638..3e8456a94a 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -4184,6 +4184,25 @@ 'base': 'BlockdevOptionsGenericFormat', 'data': { '*bottom': 'str' } } +## +# @OnCbwError: +# +# An enumeration of possible behaviors for copy-before-write operation +# failures. +# +# @break-guest-write: report the error to the guest. This way, the guest +# will not be able to overwrite areas that cannot be +# backed up, so the backup process remains valid. +# +# @break-snapshot: continue guest write. Doing so will make the provided +# snapshot state invalid and any backup or export +# process based on it will finally fail. +# +# Since: 7.1 +## +{ 'enum': 'OnCbwError', + 'data': [ 'break-guest-write', 'break-snapshot' ] } + ## # @BlockdevOptionsCbw: # @@ -4205,11 +4224,15 @@ # modifications (or removing) of specified bitmap doesn't # influence the filter. (Since 7.0) # +# @on-cbw-error: Behavior on failure of copy-before-write operation. +# Default is @break-guest-write. (Since 7.1) +# # Since: 6.2 ## { 'struct': 'BlockdevOptionsCbw', 'base': 'BlockdevOptionsGenericFormat', - 'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap' } } + 'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap', + '*on-cbw-error': 'OnCbwError' } } ## # @BlockdevOptions: From dd3e97dfbe199fa277869d127884071100a426e5 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 7 Apr 2022 16:27:22 +0300 Subject: [PATCH 3/9] iotests: add copy-before-write: on-cbw-error tests Add tests for new option of copy-before-write filter: on-cbw-error. Note that we use QEMUMachine instead of VM class, because in further commit we'll want to use throttling which doesn't work with -accel qtest used by VM. We also touch pylintrc to not break iotest 297. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz [vsementsov: add arguments to QEMUMachine constructor] Signed-off-by: Vladimir Sementsov-Ogievskiy --- tests/qemu-iotests/pylintrc | 5 + tests/qemu-iotests/tests/copy-before-write | 135 ++++++++++++++++++ .../qemu-iotests/tests/copy-before-write.out | 5 + 3 files changed, 145 insertions(+) create mode 100755 tests/qemu-iotests/tests/copy-before-write create mode 100644 tests/qemu-iotests/tests/copy-before-write.out diff --git a/tests/qemu-iotests/pylintrc b/tests/qemu-iotests/pylintrc index 32ab77b8bb..f4f823a991 100644 --- a/tests/qemu-iotests/pylintrc +++ b/tests/qemu-iotests/pylintrc @@ -51,3 +51,8 @@ notes=FIXME, # Maximum number of characters on a single line. max-line-length=79 + + +[SIMILARITIES] + +min-similarity-lines=6 diff --git a/tests/qemu-iotests/tests/copy-before-write b/tests/qemu-iotests/tests/copy-before-write new file mode 100755 index 0000000000..e6559de279 --- /dev/null +++ b/tests/qemu-iotests/tests/copy-before-write @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 +# group: auto backup +# +# Copyright (c) 2022 Virtuozzo International GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import os +import re + +from qemu.machine import QEMUMachine + +import iotests +from iotests import qemu_img_create, qemu_io + + +temp_img = os.path.join(iotests.test_dir, 'temp') +source_img = os.path.join(iotests.test_dir, 'source') +size = '1M' + + +class TestCbwError(iotests.QMPTestCase): + def tearDown(self): + self.vm.shutdown() + os.remove(temp_img) + os.remove(source_img) + + def setUp(self): + qemu_img_create('-f', iotests.imgfmt, source_img, size) + qemu_img_create('-f', iotests.imgfmt, temp_img, size) + qemu_io('-c', 'write 0 1M', source_img) + + opts = ['-nodefaults', '-display', 'none', '-machine', 'none'] + self.vm = QEMUMachine(iotests.qemu_prog, opts, + base_temp_dir=iotests.test_dir, + sock_dir=iotests.sock_dir) + self.vm.launch() + + def do_cbw_error(self, on_cbw_error): + result = self.vm.qmp('blockdev-add', { + 'node-name': 'cbw', + 'driver': 'copy-before-write', + 'on-cbw-error': on_cbw_error, + 'file': { + 'driver': iotests.imgfmt, + 'file': { + 'driver': 'file', + 'filename': source_img, + } + }, + 'target': { + 'driver': iotests.imgfmt, + 'file': { + 'driver': 'blkdebug', + 'image': { + 'driver': 'file', + 'filename': temp_img + }, + 'inject-error': [ + { + 'event': 'write_aio', + 'errno': 5, + 'immediately': False, + 'once': True + } + ] + } + } + }) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('blockdev-add', { + 'node-name': 'access', + 'driver': 'snapshot-access', + 'file': 'cbw' + }) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('human-monitor-command', + command_line='qemu-io cbw "write 0 1M"') + self.assert_qmp(result, 'return', '') + + result = self.vm.qmp('human-monitor-command', + command_line='qemu-io access "read 0 1M"') + self.assert_qmp(result, 'return', '') + + self.vm.shutdown() + log = self.vm.get_log() + log = re.sub(r'^\[I \d+\.\d+\] OPENED\n', '', log) + log = re.sub(r'\[I \+\d+\.\d+\] CLOSED\n?$', '', log) + log = iotests.filter_qemu_io(log) + return log + + def test_break_snapshot_on_cbw_error(self): + """break-snapshot behavior: + Guest write succeed, but further snapshot-read fails, as snapshot is + broken. + """ + log = self.do_cbw_error('break-snapshot') + + self.assertEqual(log, """\ +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read failed: Permission denied +""") + + def test_break_guest_write_on_cbw_error(self): + """break-guest-write behavior: + Guest write fails, but snapshot-access continues working and further + snapshot-read succeeds. + """ + log = self.do_cbw_error('break-guest-write') + + self.assertEqual(log, """\ +write failed: Input/output error +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +""") + + +if __name__ == '__main__': + iotests.main(supported_fmts=['qcow2'], + supported_protocols=['file']) diff --git a/tests/qemu-iotests/tests/copy-before-write.out b/tests/qemu-iotests/tests/copy-before-write.out new file mode 100644 index 0000000000..fbc63e62f8 --- /dev/null +++ b/tests/qemu-iotests/tests/copy-before-write.out @@ -0,0 +1,5 @@ +.. +---------------------------------------------------------------------- +Ran 2 tests + +OK From e1878eb5f0d93a67deb46aaeea898cf4824a759a Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 7 Apr 2022 16:27:23 +0300 Subject: [PATCH 4/9] util: add qemu-co-timeout Add new API, to make a time limited call of the coroutine. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Signed-off-by: Vladimir Sementsov-Ogievskiy --- include/qemu/coroutine.h | 13 ++++++ util/meson.build | 1 + util/qemu-co-timeout.c | 89 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+) create mode 100644 util/qemu-co-timeout.c diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h index d1548d5b11..08c5bb3c76 100644 --- a/include/qemu/coroutine.h +++ b/include/qemu/coroutine.h @@ -331,6 +331,19 @@ static inline void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns) qemu_co_sleep_ns_wakeable(&w, type, ns); } +typedef void CleanupFunc(void *opaque); +/** + * Run entry in a coroutine and start timer. Wait for entry to finish or for + * timer to elapse, what happen first. If entry finished, return 0, if timer + * elapsed earlier, return -ETIMEDOUT. + * + * Be careful, entry execution is not canceled, user should handle it somehow. + * If @clean is provided, it's called after coroutine finish if timeout + * happened. + */ +int coroutine_fn qemu_co_timeout(CoroutineEntry *entry, void *opaque, + uint64_t timeout_ns, CleanupFunc clean); + /** * Wake a coroutine if it is sleeping in qemu_co_sleep_ns. The timer will be * deleted. @sleep_state must be the variable whose address was given to diff --git a/util/meson.build b/util/meson.build index 4939b0b91c..8cce8f8968 100644 --- a/util/meson.build +++ b/util/meson.build @@ -85,6 +85,7 @@ if have_block util_ss.add(files('block-helpers.c')) util_ss.add(files('qemu-coroutine-sleep.c')) util_ss.add(files('qemu-co-shared-resource.c')) + util_ss.add(files('qemu-co-timeout.c')) util_ss.add(files('thread-pool.c', 'qemu-timer.c')) util_ss.add(files('readline.c')) util_ss.add(files('throttle.c')) diff --git a/util/qemu-co-timeout.c b/util/qemu-co-timeout.c new file mode 100644 index 0000000000..00cd335649 --- /dev/null +++ b/util/qemu-co-timeout.c @@ -0,0 +1,89 @@ +/* + * Helper functionality for distributing a fixed total amount of + * an abstract resource among multiple coroutines. + * + * Copyright (c) 2022 Virtuozzo International GmbH + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/coroutine.h" +#include "block/aio.h" + +typedef struct QemuCoTimeoutState { + CoroutineEntry *entry; + void *opaque; + QemuCoSleep sleep_state; + bool marker; + CleanupFunc *clean; +} QemuCoTimeoutState; + +static void coroutine_fn qemu_co_timeout_entry(void *opaque) +{ + QemuCoTimeoutState *s = opaque; + + s->entry(s->opaque); + + if (s->marker) { + assert(!s->sleep_state.to_wake); + /* .marker set by qemu_co_timeout, it have been failed */ + if (s->clean) { + s->clean(s->opaque); + } + g_free(s); + } else { + s->marker = true; + qemu_co_sleep_wake(&s->sleep_state); + } +} + +int coroutine_fn qemu_co_timeout(CoroutineEntry *entry, void *opaque, + uint64_t timeout_ns, CleanupFunc clean) +{ + QemuCoTimeoutState *s; + Coroutine *co; + + if (timeout_ns == 0) { + entry(opaque); + return 0; + } + + s = g_new(QemuCoTimeoutState, 1); + *s = (QemuCoTimeoutState) { + .entry = entry, + .opaque = opaque, + .clean = clean + }; + + co = qemu_coroutine_create(qemu_co_timeout_entry, s); + + aio_co_enter(qemu_get_current_aio_context(), co); + qemu_co_sleep_ns_wakeable(&s->sleep_state, QEMU_CLOCK_REALTIME, timeout_ns); + + if (s->marker) { + /* .marker set by qemu_co_timeout_entry, success */ + g_free(s); + return 0; + } + + /* Don't free s, as we can't cancel qemu_co_timeout_entry execution */ + s->marker = true; + return -ETIMEDOUT; +} From 15df6e698719505570f8532772c2b08cb278a45a Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 7 Apr 2022 16:27:24 +0300 Subject: [PATCH 5/9] block/block-copy: block_copy(): add timeout_ns parameter Add possibility to limit block_copy() call in time. To be used in the next commit. As timed-out block_copy() call will continue in background anyway (we can't immediately cancel IO operation), it's important also give user a possibility to pass a callback, to do some additional actions on block-copy call finish. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/block-copy.c | 35 +++++++++++++++++++++++++++-------- block/copy-before-write.c | 2 +- include/block/block-copy.h | 4 +++- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/block/block-copy.c b/block/block-copy.c index ec46775ea5..bb947afdda 100644 --- a/block/block-copy.c +++ b/block/block-copy.c @@ -883,23 +883,42 @@ static int coroutine_fn block_copy_common(BlockCopyCallState *call_state) return ret; } -int coroutine_fn block_copy(BlockCopyState *s, int64_t start, int64_t bytes, - bool ignore_ratelimit) +static void coroutine_fn block_copy_async_co_entry(void *opaque) { - BlockCopyCallState call_state = { + block_copy_common(opaque); +} + +int coroutine_fn block_copy(BlockCopyState *s, int64_t start, int64_t bytes, + bool ignore_ratelimit, uint64_t timeout_ns, + BlockCopyAsyncCallbackFunc cb, + void *cb_opaque) +{ + int ret; + BlockCopyCallState *call_state = g_new(BlockCopyCallState, 1); + + *call_state = (BlockCopyCallState) { .s = s, .offset = start, .bytes = bytes, .ignore_ratelimit = ignore_ratelimit, .max_workers = BLOCK_COPY_MAX_WORKERS, + .cb = cb, + .cb_opaque = cb_opaque, }; - return block_copy_common(&call_state); -} + ret = qemu_co_timeout(block_copy_async_co_entry, call_state, timeout_ns, + g_free); + if (ret < 0) { + assert(ret == -ETIMEDOUT); + block_copy_call_cancel(call_state); + /* call_state will be freed by running coroutine. */ + return ret; + } -static void coroutine_fn block_copy_async_co_entry(void *opaque) -{ - block_copy_common(opaque); + ret = call_state->ret; + g_free(call_state); + + return ret; } BlockCopyCallState *block_copy_async(BlockCopyState *s, diff --git a/block/copy-before-write.c b/block/copy-before-write.c index c8a11a09d2..fc13c7cd44 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -111,7 +111,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs, off = QEMU_ALIGN_DOWN(offset, cluster_size); end = QEMU_ALIGN_UP(offset + bytes, cluster_size); - ret = block_copy(s->bcs, off, end - off, true); + ret = block_copy(s->bcs, off, end - off, true, 0, NULL, NULL); if (ret < 0 && s->on_cbw_error == ON_CBW_ERROR_BREAK_GUEST_WRITE) { return ret; } diff --git a/include/block/block-copy.h b/include/block/block-copy.h index 68bbd344b2..ba0b425d78 100644 --- a/include/block/block-copy.h +++ b/include/block/block-copy.h @@ -40,7 +40,9 @@ int64_t block_copy_reset_unallocated(BlockCopyState *s, int64_t offset, int64_t *count); int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes, - bool ignore_ratelimit); + bool ignore_ratelimit, uint64_t timeout_ns, + BlockCopyAsyncCallbackFunc cb, + void *cb_opaque); /* * Run block-copy in a coroutine, create corresponding BlockCopyCallState From 6db7fd1ca980f8dd2fd082f13613166e170afd05 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 7 Apr 2022 16:27:25 +0300 Subject: [PATCH 6/9] block/copy-before-write: implement cbw-timeout option In some scenarios, when copy-before-write operations lasts too long time, it's better to cancel it. Most useful would be to use the new option together with on-cbw-error=break-snapshot: this way if cbw operation takes too long time we'll just cancel backup process but do not disturb the guest too much. Note the tricky point of realization: we keep additional point in bs->in_flight during block_copy operation even if it's timed-out. Background "cancelled" block_copy operations will finish at some point and will want to access state. We should care to not free the state in .bdrv_close() earlier. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz [vsementsov: use bdrv_inc_in_flight()/bdrv_dec_in_flight() instead of direct manipulation on bs->in_flight] Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/copy-before-write.c | 22 +++++++++++++++++++++- qapi/block-core.json | 8 +++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/block/copy-before-write.c b/block/copy-before-write.c index fc13c7cd44..c24b8dd117 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -42,6 +42,7 @@ typedef struct BDRVCopyBeforeWriteState { BlockCopyState *bcs; BdrvChild *target; OnCbwError on_cbw_error; + uint32_t cbw_timeout_ns; /* * @lock: protects access to @access_bitmap, @done_bitmap and @@ -83,6 +84,13 @@ static coroutine_fn int cbw_co_preadv( return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); } +static void block_copy_cb(void *opaque) +{ + BlockDriverState *bs = opaque; + + bdrv_dec_in_flight(bs); +} + /* * Do copy-before-write operation. * @@ -111,7 +119,16 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs, off = QEMU_ALIGN_DOWN(offset, cluster_size); end = QEMU_ALIGN_UP(offset + bytes, cluster_size); - ret = block_copy(s->bcs, off, end - off, true, 0, NULL, NULL); + /* + * Increase in_flight, so that in case of timed-out block-copy, the + * remaining background block_copy() request (which can't be immediately + * cancelled by timeout) is presented in bs->in_flight. This way we are + * sure that on bs close() we'll previously wait for all timed-out but yet + * running block_copy calls. + */ + bdrv_inc_in_flight(bs); + ret = block_copy(s->bcs, off, end - off, true, s->cbw_timeout_ns, + block_copy_cb, bs); if (ret < 0 && s->on_cbw_error == ON_CBW_ERROR_BREAK_GUEST_WRITE) { return ret; } @@ -377,6 +394,7 @@ static BlockdevOptions *cbw_parse_options(QDict *options, Error **errp) */ qdict_extract_subqdict(options, NULL, "bitmap"); qdict_del(options, "on-cbw-error"); + qdict_del(options, "cbw-timeout"); out: visit_free(v); @@ -423,6 +441,8 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, } s->on_cbw_error = opts->has_on_cbw_error ? opts->on_cbw_error : ON_CBW_ERROR_BREAK_GUEST_WRITE; + s->cbw_timeout_ns = opts->has_cbw_timeout ? + opts->cbw_timeout * NANOSECONDS_PER_SECOND : 0; bs->total_sectors = bs->file->bs->total_sectors; bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | diff --git a/qapi/block-core.json b/qapi/block-core.json index 3e8456a94a..2173e7734a 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -4227,12 +4227,18 @@ # @on-cbw-error: Behavior on failure of copy-before-write operation. # Default is @break-guest-write. (Since 7.1) # +# @cbw-timeout: Zero means no limit. Non-zero sets the timeout in seconds +# for copy-before-write operation. When a timeout occurs, +# the respective copy-before-write operation will fail, and +# the @on-cbw-error parameter will decide how this failure +# is handled. Default 0. (Since 7.1) +# # Since: 6.2 ## { 'struct': 'BlockdevOptionsCbw', 'base': 'BlockdevOptionsGenericFormat', 'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap', - '*on-cbw-error': 'OnCbwError' } } + '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32' } } ## # @BlockdevOptions: From 9d05a87b77a63ed5505c59f5e8e6c5ca4f2c04d3 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 7 Apr 2022 16:27:26 +0300 Subject: [PATCH 7/9] iotests: copy-before-write: add cases for cbw-timeout option Add two simple test-cases: timeout failure with break-snapshot-on-cbw-error behavior and similar with break-guest-write-on-cbw-error behavior. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Signed-off-by: Vladimir Sementsov-Ogievskiy --- tests/qemu-iotests/tests/copy-before-write | 81 +++++++++++++++++++ .../qemu-iotests/tests/copy-before-write.out | 4 +- 2 files changed, 83 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/tests/copy-before-write b/tests/qemu-iotests/tests/copy-before-write index e6559de279..16efebbf8f 100755 --- a/tests/qemu-iotests/tests/copy-before-write +++ b/tests/qemu-iotests/tests/copy-before-write @@ -129,6 +129,87 @@ read 1048576/1048576 bytes at offset 0 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) """) + def do_cbw_timeout(self, on_cbw_error): + result = self.vm.qmp('object-add', { + 'qom-type': 'throttle-group', + 'id': 'group0', + 'limits': {'bps-write': 300 * 1024} + }) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('blockdev-add', { + 'node-name': 'cbw', + 'driver': 'copy-before-write', + 'on-cbw-error': on_cbw_error, + 'cbw-timeout': 1, + 'file': { + 'driver': iotests.imgfmt, + 'file': { + 'driver': 'file', + 'filename': source_img, + } + }, + 'target': { + 'driver': 'throttle', + 'throttle-group': 'group0', + 'file': { + 'driver': 'qcow2', + 'file': { + 'driver': 'file', + 'filename': temp_img + } + } + } + }) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('blockdev-add', { + 'node-name': 'access', + 'driver': 'snapshot-access', + 'file': 'cbw' + }) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('human-monitor-command', + command_line='qemu-io cbw "write 0 512K"') + self.assert_qmp(result, 'return', '') + + # We need second write to trigger throttling + result = self.vm.qmp('human-monitor-command', + command_line='qemu-io cbw "write 512K 512K"') + self.assert_qmp(result, 'return', '') + + result = self.vm.qmp('human-monitor-command', + command_line='qemu-io access "read 0 1M"') + self.assert_qmp(result, 'return', '') + + self.vm.shutdown() + log = self.vm.get_log() + log = re.sub(r'^\[I \d+\.\d+\] OPENED\n', '', log) + log = re.sub(r'\[I \+\d+\.\d+\] CLOSED\n?$', '', log) + log = iotests.filter_qemu_io(log) + return log + + def test_timeout_break_guest(self): + log = self.do_cbw_timeout('break-guest-write') + self.assertEqual(log, """\ +wrote 524288/524288 bytes at offset 0 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +write failed: Connection timed out +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +""") + + def test_timeout_break_snapshot(self): + log = self.do_cbw_timeout('break-snapshot') + self.assertEqual(log, """\ +wrote 524288/524288 bytes at offset 0 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 524288/524288 bytes at offset 524288 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read failed: Permission denied +""") + if __name__ == '__main__': iotests.main(supported_fmts=['qcow2'], diff --git a/tests/qemu-iotests/tests/copy-before-write.out b/tests/qemu-iotests/tests/copy-before-write.out index fbc63e62f8..89968f35d7 100644 --- a/tests/qemu-iotests/tests/copy-before-write.out +++ b/tests/qemu-iotests/tests/copy-before-write.out @@ -1,5 +1,5 @@ -.. +.... ---------------------------------------------------------------------- -Ran 2 tests +Ran 4 tests OK From 8bb100c9e2dc1fe0e33283b0c43252dbaf4eb71b Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 30 May 2022 12:39:29 +0200 Subject: [PATCH 8/9] nbd: trace long NBD operations At the moment there are 2 sources of lengthy operations if configured: * open connection, which could retry inside and * reconnect of already opened connection These operations could be quite lengthy and cumbersome to catch thus it would be quite natural to add trace points for them. This patch is based on the original downstream work made by Vladimir. Signed-off-by: Denis V. Lunev CC: Eric Blake CC: Vladimir Sementsov-Ogievskiy CC: Kevin Wolf CC: Hanna Reitz CC: Paolo Bonzini Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/nbd.c | 6 +++++- block/trace-events | 2 ++ nbd/client-connection.c | 2 ++ nbd/trace-events | 3 +++ 4 files changed, 12 insertions(+), 1 deletion(-) diff --git a/block/nbd.c b/block/nbd.c index 7f5f50ec46..5849d312cc 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -371,6 +371,7 @@ static bool nbd_client_connecting(BDRVNBDState *s) /* Called with s->requests_lock taken. */ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) { + int ret; bool blocking = s->state == NBD_CLIENT_CONNECTING_WAIT; /* @@ -380,6 +381,8 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) assert(nbd_client_connecting(s)); assert(s->in_flight == 1); + trace_nbd_reconnect_attempt(s->bs->in_flight); + if (blocking && !s->reconnect_delay_timer) { /* * It's the first reconnect attempt after switching to @@ -401,7 +404,8 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) } qemu_mutex_unlock(&s->requests_lock); - nbd_co_do_establish_connection(s->bs, blocking, NULL); + ret = nbd_co_do_establish_connection(s->bs, blocking, NULL); + trace_nbd_reconnect_attempt_result(ret, s->bs->in_flight); qemu_mutex_lock(&s->requests_lock); /* diff --git a/block/trace-events b/block/trace-events index 549090d453..48dbf10c66 100644 --- a/block/trace-events +++ b/block/trace-events @@ -172,6 +172,8 @@ nbd_read_reply_entry_fail(int ret, const char *err) "ret = %d, err: %s" nbd_co_request_fail(uint64_t from, uint32_t len, uint64_t handle, uint16_t flags, uint16_t type, const char *name, int ret, const char *err) "Request failed { .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 ", .flags = 0x%" PRIx16 ", .type = %" PRIu16 " (%s) } ret = %d, err: %s" nbd_client_handshake(const char *export_name) "export '%s'" nbd_client_handshake_success(const char *export_name) "export '%s'" +nbd_reconnect_attempt(unsigned in_flight) "in_flight %u" +nbd_reconnect_attempt_result(int ret, unsigned in_flight) "ret %d in_flight %u" # ssh.c ssh_restart_coroutine(void *co) "co=%p" diff --git a/nbd/client-connection.c b/nbd/client-connection.c index 2a632931c3..0c5f917efa 100644 --- a/nbd/client-connection.c +++ b/nbd/client-connection.c @@ -23,6 +23,7 @@ */ #include "qemu/osdep.h" +#include "trace.h" #include "block/nbd.h" @@ -210,6 +211,7 @@ static void *connect_thread_func(void *opaque) object_unref(OBJECT(conn->sioc)); conn->sioc = NULL; if (conn->do_retry && !conn->detached) { + trace_nbd_connect_thread_sleep(timeout); qemu_mutex_unlock(&conn->mutex); sleep(timeout); diff --git a/nbd/trace-events b/nbd/trace-events index c4919a2dd5..b7032ca277 100644 --- a/nbd/trace-events +++ b/nbd/trace-events @@ -73,3 +73,6 @@ nbd_co_receive_request_decode_type(uint64_t handle, uint16_t type, const char *n nbd_co_receive_request_payload_received(uint64_t handle, uint32_t len) "Payload received: handle = %" PRIu64 ", len = %" PRIu32 nbd_co_receive_align_compliance(const char *op, uint64_t from, uint32_t len, uint32_t align) "client sent non-compliant unaligned %s request: from=0x%" PRIx64 ", len=0x%" PRIx32 ", align=0x%" PRIx32 nbd_trip(void) "Reading request" + +# client-connection.c +nbd_connect_thread_sleep(uint64_t timeout) "timeout %" PRIu64 From 1b8f777673985af366de099ad4e41d334b36fb12 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 30 May 2022 12:39:57 +0200 Subject: [PATCH 9/9] block: use 'unsigned' for in_flight field on driver state This patch makes in_flight field 'unsigned' for BDRVNBDState and MirrorBlockJob. This matches the definition of this field on BDS and is generically correct - we should never get negative value here. Signed-off-by: Denis V. Lunev CC: John Snow CC: Vladimir Sementsov-Ogievskiy CC: Kevin Wolf CC: Hanna Reitz CC: Eric Blake Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/mirror.c | 2 +- block/nbd.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index d8ecb9efa2..3c4ab1159d 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -73,7 +73,7 @@ typedef struct MirrorBlockJob { uint64_t last_pause_ns; unsigned long *in_flight_bitmap; - int in_flight; + unsigned in_flight; int64_t bytes_in_flight; QTAILQ_HEAD(, MirrorOp) ops_in_flight; int ret; diff --git a/block/nbd.c b/block/nbd.c index 5849d312cc..97683cce27 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -77,7 +77,7 @@ typedef struct BDRVNBDState { QemuMutex requests_lock; NBDClientState state; CoQueue free_sema; - int in_flight; + unsigned in_flight; NBDClientRequest requests[MAX_NBD_REQUESTS]; QEMUTimer *reconnect_delay_timer;