From be16b8bf9f231d19c172961d3327f9f72a7cd45f Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 6 Sep 2021 22:06:48 +0300 Subject: [PATCH 1/7] nbd: allow reconnect on open, with corresponding new options It is useful when start of vm and start of nbd server are not simple to sync. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake --- block/nbd.c | 45 +++++++++++++++++++++++++++++++++++++++++++- qapi/block-core.json | 9 ++++++++- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index 5ef462db1b..63dbfa807d 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -80,6 +80,7 @@ typedef struct BDRVNBDState { NBDClientState state; QEMUTimer *reconnect_delay_timer; + QEMUTimer *open_timer; NBDClientRequest requests[MAX_NBD_REQUESTS]; NBDReply reply; @@ -87,6 +88,7 @@ typedef struct BDRVNBDState { /* Connection parameters */ uint32_t reconnect_delay; + uint32_t open_timeout; SocketAddress *saddr; char *export, *tlscredsid; QCryptoTLSCreds *tlscreds; @@ -218,6 +220,32 @@ static void nbd_teardown_connection(BlockDriverState *bs) s->state = NBD_CLIENT_QUIT; } +static void open_timer_del(BDRVNBDState *s) +{ + if (s->open_timer) { + timer_free(s->open_timer); + s->open_timer = NULL; + } +} + +static void open_timer_cb(void *opaque) +{ + BDRVNBDState *s = opaque; + + nbd_co_establish_connection_cancel(s->conn); + open_timer_del(s); +} + +static void open_timer_init(BDRVNBDState *s, uint64_t expire_time_ns) +{ + assert(!s->open_timer); + s->open_timer = aio_timer_new(bdrv_get_aio_context(s->bs), + QEMU_CLOCK_REALTIME, + SCALE_NS, + open_timer_cb, s); + timer_mod(s->open_timer, expire_time_ns); +} + static bool nbd_client_connecting(BDRVNBDState *s) { NBDClientState state = qatomic_load_acquire(&s->state); @@ -1742,6 +1770,15 @@ static QemuOptsList nbd_runtime_opts = { "future requests before a successful reconnect will " "immediately fail. Default 0", }, + { + .name = "open-timeout", + .type = QEMU_OPT_NUMBER, + .help = "In seconds. If zero, the nbd driver tries the connection " + "only once, and fails to open if the connection fails. " + "If non-zero, the nbd driver will repeat connection " + "attempts until successful or until @open-timeout seconds " + "have elapsed. Default 0", + }, { /* end of list */ } }, }; @@ -1797,6 +1834,7 @@ static int nbd_process_options(BlockDriverState *bs, QDict *options, } s->reconnect_delay = qemu_opt_get_number(opts, "reconnect-delay", 0); + s->open_timeout = qemu_opt_get_number(opts, "open-timeout", 0); ret = 0; @@ -1828,7 +1866,12 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags, s->conn = nbd_client_connection_new(s->saddr, true, s->export, s->x_dirty_bitmap, s->tlscreds); - /* TODO: Configurable retry-until-timeout behaviour. */ + if (s->open_timeout) { + nbd_client_connection_enable_retry(s->conn); + open_timer_init(s, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + + s->open_timeout * NANOSECONDS_PER_SECOND); + } + s->state = NBD_CLIENT_CONNECTING_WAIT; ret = nbd_do_establish_connection(bs, errp); if (ret < 0) { diff --git a/qapi/block-core.json b/qapi/block-core.json index 1d3dd9cb48..bd0b285245 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -4096,6 +4096,12 @@ # future requests before a successful reconnect will # immediately fail. Default 0 (Since 4.2) # +# @open-timeout: In seconds. If zero, the nbd driver tries the connection +# only once, and fails to open if the connection fails. +# If non-zero, the nbd driver will repeat connection attempts +# until successful or until @open-timeout seconds have elapsed. +# Default 0 (Since 7.0) +# # Features: # @unstable: Member @x-dirty-bitmap is experimental. # @@ -4106,7 +4112,8 @@ '*export': 'str', '*tls-creds': 'str', '*x-dirty-bitmap': { 'type': 'str', 'features': [ 'unstable' ] }, - '*reconnect-delay': 'uint32' } } + '*reconnect-delay': 'uint32', + '*open-timeout': 'uint32' } } ## # @BlockdevOptionsRaw: From 169b9a94ed24a40b2bfc8664ecd2e75cd33d0978 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 6 Sep 2021 22:06:49 +0300 Subject: [PATCH 2/7] nbd/client-connection: nbd_co_establish_connection(): return real error The only caller of nbd_do_establish_connection() that uses errp is nbd_open(). The only way to cancel this call is through open_timer timeout. And for this case, user will be more interested in description of last failed connect rather than in "Connection attempt cancelled by other operation". So, let's change behavior on cancel to return previous failure error if available. Do the same for non-blocking failure case. In this case we still don't have a caller that is interested in errp. But let's be consistent. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake --- nbd/client-connection.c | 52 +++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/nbd/client-connection.c b/nbd/client-connection.c index 695f855754..722998c985 100644 --- a/nbd/client-connection.c +++ b/nbd/client-connection.c @@ -39,16 +39,18 @@ struct NBDClientConnection { QemuMutex mutex; - /* - * @sioc and @err represent a connection attempt. While running - * is true, they are only used by the connection thread, and mutex - * locking is not needed. Once the thread finishes, - * nbd_co_establish_connection then steals these pointers while - * under the mutex. - */ NBDExportInfo updated_info; + /* + * @sioc represents a successful result. While thread is running, @sioc is + * used only by thread and not protected by mutex. When thread is not + * running, @sioc is stolen by nbd_co_establish_connection() under mutex. + */ QIOChannelSocket *sioc; QIOChannel *ioc; + /* + * @err represents previous attempt. It may be copied by + * nbd_co_establish_connection() when it reports failure. + */ Error *err; /* All further fields are accessed only under mutex */ @@ -170,18 +172,18 @@ static void *connect_thread_func(void *opaque) qemu_mutex_lock(&conn->mutex); while (!conn->detached) { + Error *local_err = NULL; + assert(!conn->sioc); conn->sioc = qio_channel_socket_new(); qemu_mutex_unlock(&conn->mutex); - error_free(conn->err); - conn->err = NULL; conn->updated_info = conn->initial_info; ret = nbd_connect(conn->sioc, conn->saddr, conn->do_negotiation ? &conn->updated_info : NULL, - conn->tlscreds, &conn->ioc, &conn->err); + conn->tlscreds, &conn->ioc, &local_err); /* * conn->updated_info will finally be returned to the user. Clear the @@ -194,6 +196,10 @@ static void *connect_thread_func(void *opaque) qemu_mutex_lock(&conn->mutex); + error_free(conn->err); + conn->err = NULL; + error_propagate(&conn->err, local_err); + if (ret < 0) { object_unref(OBJECT(conn->sioc)); conn->sioc = NULL; @@ -311,14 +317,17 @@ nbd_co_establish_connection(NBDClientConnection *conn, NBDExportInfo *info, } conn->running = true; - error_free(conn->err); - conn->err = NULL; qemu_thread_create(&thread, "nbd-connect", connect_thread_func, conn, QEMU_THREAD_DETACHED); } if (!blocking) { - error_setg(errp, "No connection at the moment"); + if (conn->err) { + error_propagate(errp, error_copy(conn->err)); + } else { + error_setg(errp, "No connection at the moment"); + } + return NULL; } @@ -339,14 +348,23 @@ nbd_co_establish_connection(NBDClientConnection *conn, NBDExportInfo *info, * attempt as failed, but leave the connection thread running, * to reuse it for the next connection attempt. */ - error_setg(errp, "Connection attempt cancelled by other operation"); + if (conn->err) { + error_propagate(errp, error_copy(conn->err)); + } else { + error_setg(errp, + "Connection attempt cancelled by other operation"); + } + return NULL; } else { - error_propagate(errp, conn->err); - conn->err = NULL; - if (!conn->sioc) { + /* Thread finished. There must be either error or sioc */ + assert(!conn->err != !conn->sioc); + + if (conn->err) { + error_propagate(errp, error_copy(conn->err)); return NULL; } + if (conn->do_negotiation) { memcpy(info, &conn->updated_info, sizeof(*info)); if (conn->ioc) { From 9e14491af473e94ea836c70c8a57da68d02e62b8 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 6 Sep 2021 22:06:50 +0300 Subject: [PATCH 3/7] nbd/client-connection: improve error message of cancelled attempt Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake --- nbd/client-connection.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/nbd/client-connection.c b/nbd/client-connection.c index 722998c985..2bda42641d 100644 --- a/nbd/client-connection.c +++ b/nbd/client-connection.c @@ -351,8 +351,15 @@ nbd_co_establish_connection(NBDClientConnection *conn, NBDExportInfo *info, if (conn->err) { error_propagate(errp, error_copy(conn->err)); } else { - error_setg(errp, - "Connection attempt cancelled by other operation"); + /* + * The only possible case here is cancelling by open_timer + * during nbd_open(). So, the error message is for that case. + * If we have more use cases, we can refactor + * nbd_co_establish_connection_cancel() to take an additional + * parameter cancel_reason, that would be passed than to the + * caller of cancelled nbd_co_establish_connection(). + */ + error_setg(errp, "Connection attempt cancelled by timeout"); } return NULL; From c34ec5137d67bab194d5c20e1c85e0e5d392af8c Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 6 Sep 2021 22:06:51 +0300 Subject: [PATCH 4/7] iotests.py: add qemu_tool_popen() Split qemu_tool_popen() from qemu_tool_pipe_and_status() to be used separately. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Nikita Lapshin --- tests/qemu-iotests/iotests.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 83bfedb902..452d047716 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -138,14 +138,22 @@ def unarchive_sample_image(sample, fname): shutil.copyfileobj(f_in, f_out) +def qemu_tool_popen(args: Sequence[str], + connect_stderr: bool = True) -> 'subprocess.Popen[str]': + stderr = subprocess.STDOUT if connect_stderr else None + # pylint: disable=consider-using-with + return subprocess.Popen(args, + stdout=subprocess.PIPE, + stderr=stderr, + universal_newlines=True) + + def qemu_tool_pipe_and_status(tool: str, args: Sequence[str], connect_stderr: bool = True) -> Tuple[str, int]: """ Run a tool and return both its output and its exit code """ - stderr = subprocess.STDOUT if connect_stderr else None - with subprocess.Popen(args, stdout=subprocess.PIPE, - stderr=stderr, universal_newlines=True) as subp: + with qemu_tool_popen(args, connect_stderr) as subp: output = subp.communicate()[0] if subp.returncode < 0: cmd = ' '.join(args) From 94a781f220141ad9f91a1c1583368cd5f24c4d7e Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 14 Oct 2021 19:01:14 +0200 Subject: [PATCH 5/7] iotests.py: add and use qemu_io_wrap_args() For qemu_io* functions support --image-opts argument, which conflicts with -f argument from qemu_io_args. For QemuIoInteractive use new wrapper as well, which allows relying on default format. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Nikita Lapshin --- tests/qemu-iotests/iotests.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 452d047716..c4b81a291b 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -241,10 +241,15 @@ def img_info_log(filename, filter_path=None, imgopts=False, extra_args=()): filter_path = filename log(filter_img_info(output, filter_path)) +def qemu_io_wrap_args(args: Sequence[str]) -> List[str]: + if '-f' in args or '--image-opts' in args: + return qemu_io_args_no_fmt + list(args) + else: + return qemu_io_args + list(args) + def qemu_io(*args): '''Run qemu-io and return the stdout data''' - args = qemu_io_args + list(args) - return qemu_tool_pipe_and_status('qemu-io', args)[0] + return qemu_tool_pipe_and_status('qemu-io', qemu_io_wrap_args(args))[0] def qemu_io_log(*args): result = qemu_io(*args) @@ -253,12 +258,7 @@ def qemu_io_log(*args): def qemu_io_silent(*args): '''Run qemu-io and return the exit code, suppressing stdout''' - if '-f' in args or '--image-opts' in args: - default_args = qemu_io_args_no_fmt - else: - default_args = qemu_io_args - - args = default_args + list(args) + args = qemu_io_wrap_args(args) result = subprocess.run(args, stdout=subprocess.DEVNULL, check=False) if result.returncode < 0: sys.stderr.write('qemu-io received signal %i: %s\n' % @@ -267,14 +267,14 @@ def qemu_io_silent(*args): def qemu_io_silent_check(*args): '''Run qemu-io and return the true if subprocess returned 0''' - args = qemu_io_args + list(args) + args = qemu_io_wrap_args(args) result = subprocess.run(args, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, check=False) return result.returncode == 0 class QemuIoInteractive: def __init__(self, *args): - self.args = qemu_io_args_no_fmt + list(args) + self.args = qemu_io_wrap_args(args) # We need to keep the Popen objext around, and not # close it immediately. Therefore, disable the pylint check: # pylint: disable=consider-using-with From 75c90eeeaf142f70c85ee6b5839abdd7cb1486f5 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 14 Oct 2021 19:02:06 +0200 Subject: [PATCH 6/7] iotests.py: add qemu_io_popen() Add qemu-io Popen constructor wrapper. To be used in the following new test commit. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Nikita Lapshin --- tests/qemu-iotests/iotests.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index c4b81a291b..1e2f2391d1 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -247,6 +247,9 @@ def qemu_io_wrap_args(args: Sequence[str]) -> List[str]: else: return qemu_io_args + list(args) +def qemu_io_popen(*args): + return qemu_tool_popen(qemu_io_wrap_args(args)) + def qemu_io(*args): '''Run qemu-io and return the stdout data''' return qemu_tool_pipe_and_status('qemu-io', qemu_io_wrap_args(args))[0] From ab7f7e67a7e7b49964109501dfcde4ec29bae60e Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 6 Sep 2021 22:06:54 +0300 Subject: [PATCH 7/7] iotests: add nbd-reconnect-on-open test Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Nikita Lapshin --- .../qemu-iotests/tests/nbd-reconnect-on-open | 71 +++++++++++++++++++ .../tests/nbd-reconnect-on-open.out | 11 +++ 2 files changed, 82 insertions(+) create mode 100755 tests/qemu-iotests/tests/nbd-reconnect-on-open create mode 100644 tests/qemu-iotests/tests/nbd-reconnect-on-open.out diff --git a/tests/qemu-iotests/tests/nbd-reconnect-on-open b/tests/qemu-iotests/tests/nbd-reconnect-on-open new file mode 100755 index 0000000000..8be721a24f --- /dev/null +++ b/tests/qemu-iotests/tests/nbd-reconnect-on-open @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +# +# Test nbd reconnect on open +# +# Copyright (c) 2020 Virtuozzo International GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import time + +import iotests +from iotests import qemu_img_create, file_path, qemu_io_popen, qemu_nbd, \ + qemu_io_log, log + +iotests.script_initialize(supported_fmts=['qcow2']) + +disk, nbd_sock = file_path('disk', 'nbd-sock') + + +def create_args(open_timeout): + return ['--image-opts', '-c', 'read 0 1M', + f'driver=nbd,open-timeout={open_timeout},' + f'server.type=unix,server.path={nbd_sock}'] + + +def check_fail_to_connect(open_timeout): + log(f'Check fail to connect with {open_timeout} seconds of timeout') + + start_t = time.time() + qemu_io_log(*create_args(open_timeout)) + delta_t = time.time() - start_t + + max_delta = open_timeout + 0.2 + if open_timeout <= delta_t <= max_delta: + log(f'qemu_io finished in {open_timeout}..{max_delta} seconds, OK') + else: + note = 'too early' if delta_t < open_timeout else 'too long' + log(f'qemu_io finished in {delta_t:.1f} seconds, {note}') + + +qemu_img_create('-f', iotests.imgfmt, disk, '1M') + +# Start NBD client when NBD server is not yet running. It should not fail, but +# wait for 5 seconds for the server to be available. +client = qemu_io_popen(*create_args(5)) + +time.sleep(1) +qemu_nbd('-k', nbd_sock, '-f', iotests.imgfmt, disk) + +# client should succeed +log(client.communicate()[0], filters=[iotests.filter_qemu_io]) + +# Server was started without --persistent flag, so it should be off now. Let's +# check it and at the same time check that with open-timeout=0 client fails +# immediately. +check_fail_to_connect(0) + +# Check that we will fail after non-zero timeout if server is still unavailable +check_fail_to_connect(1) diff --git a/tests/qemu-iotests/tests/nbd-reconnect-on-open.out b/tests/qemu-iotests/tests/nbd-reconnect-on-open.out new file mode 100644 index 0000000000..a35ae30ea4 --- /dev/null +++ b/tests/qemu-iotests/tests/nbd-reconnect-on-open.out @@ -0,0 +1,11 @@ +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +Check fail to connect with 0 seconds of timeout +qemu-io: can't open: Failed to connect to 'TEST_DIR/PID-nbd-sock': No such file or directory + +qemu_io finished in 0..0.2 seconds, OK +Check fail to connect with 1 seconds of timeout +qemu-io: can't open: Failed to connect to 'TEST_DIR/PID-nbd-sock': No such file or directory + +qemu_io finished in 1..1.2 seconds, OK