mirror of https://github.com/xemu-project/xemu.git
nbd: reconnect-on-open feature
v2: simple fix for mypy and pylint complains on patch 04 -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEi5wmzbL9FHyIDoahVh8kwfGfefsFAmHENyAACgkQVh8kwfGf efvnTRAAojAbPiKfCXiGn0JhhyjWSGDIbVpTL3t2l1Sc77PHobwbnXi+2PiG53gU qHif2C/vrN7+tfVazIhUn033MRu7yF0Ce+bdNnCLhc+KWpH7KhM3s+HG6srGprus C+PgCc49fbSDx2UEBMxwmmsTmwlYH14HsnKEERyBZ90wbWbO4OKL5SIgXBc8Qc+7 jCAnvEAF0welb3qQblnfEs1rlzIbHg/PyJYDw967dsrdBdPieD0jryW0nnQdol+n uigUZUVraAl1muBffKm7KY6f/2V7dnL/++OTZ2hICe24ICBtWrta8xPc84FBgvos DksBQzDoJBLR22X/U715zUlW2AI5M5zfM1DDXwVHsk6iS6DrvlLtul3gTXXoaQUG E+S3Fyc5dLR70rc5PSHtGsQ1/506fXNXldH0Pt4k3IUl/vXuIp8JkTzNY3g5WGND l8m6wLqOd8VeNsVQ01sSLVOfcqsPypSThp4XFZFYXk93TDyuZThDvEkdf3NkSahN oYI6vL0QM3HGR1QybxPFvr5ZeStscO6wXZlTGzg4FxkRQOaZ1ieBMaPvFbDgFopw jSj2+GrVeqPaHvtmKOG1VMMe2+9Zw+Dn8R3z0IisCSo5sqF5kNvRgaBTxVQ1x4HP 6EF42WCZVEn3B4adH8GXMavYkbDLtvK+Lp+aGP65WwMfYwk69WI= =YH3P -----END PGP SIGNATURE----- Merge tag 'pull-nbd-2021-12-22-v2' of https://src.openvz.org/scm/~vsementsov/qemu into staging nbd: reconnect-on-open feature v2: simple fix for mypy and pylint complains on patch 04 # gpg: Signature made Thu 23 Dec 2021 12:45:20 AM PST # gpg: using RSA key 8B9C26CDB2FD147C880E86A1561F24C1F19F79FB # gpg: Good signature from "Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>" [unknown] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 8B9C 26CD B2FD 147C 880E 86A1 561F 24C1 F19F 79FB * tag 'pull-nbd-2021-12-22-v2' of https://src.openvz.org/scm/~vsementsov/qemu: iotests: add nbd-reconnect-on-open test iotests.py: add qemu_io_popen() iotests.py: add and use qemu_io_wrap_args() iotests.py: add qemu_tool_popen() nbd/client-connection: improve error message of cancelled attempt nbd/client-connection: nbd_co_establish_connection(): return real error nbd: allow reconnect on open, with corresponding new options Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
commit
1bd88c4542
45
block/nbd.c
45
block/nbd.c
|
@ -80,6 +80,7 @@ typedef struct BDRVNBDState {
|
|||
NBDClientState state;
|
||||
|
||||
QEMUTimer *reconnect_delay_timer;
|
||||
QEMUTimer *open_timer;
|
||||
|
||||
NBDClientRequest requests[MAX_NBD_REQUESTS];
|
||||
NBDReply reply;
|
||||
|
@ -87,6 +88,7 @@ typedef struct BDRVNBDState {
|
|||
|
||||
/* Connection parameters */
|
||||
uint32_t reconnect_delay;
|
||||
uint32_t open_timeout;
|
||||
SocketAddress *saddr;
|
||||
char *export, *tlscredsid;
|
||||
QCryptoTLSCreds *tlscreds;
|
||||
|
@ -218,6 +220,32 @@ static void nbd_teardown_connection(BlockDriverState *bs)
|
|||
s->state = NBD_CLIENT_QUIT;
|
||||
}
|
||||
|
||||
static void open_timer_del(BDRVNBDState *s)
|
||||
{
|
||||
if (s->open_timer) {
|
||||
timer_free(s->open_timer);
|
||||
s->open_timer = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void open_timer_cb(void *opaque)
|
||||
{
|
||||
BDRVNBDState *s = opaque;
|
||||
|
||||
nbd_co_establish_connection_cancel(s->conn);
|
||||
open_timer_del(s);
|
||||
}
|
||||
|
||||
static void open_timer_init(BDRVNBDState *s, uint64_t expire_time_ns)
|
||||
{
|
||||
assert(!s->open_timer);
|
||||
s->open_timer = aio_timer_new(bdrv_get_aio_context(s->bs),
|
||||
QEMU_CLOCK_REALTIME,
|
||||
SCALE_NS,
|
||||
open_timer_cb, s);
|
||||
timer_mod(s->open_timer, expire_time_ns);
|
||||
}
|
||||
|
||||
static bool nbd_client_connecting(BDRVNBDState *s)
|
||||
{
|
||||
NBDClientState state = qatomic_load_acquire(&s->state);
|
||||
|
@ -1742,6 +1770,15 @@ static QemuOptsList nbd_runtime_opts = {
|
|||
"future requests before a successful reconnect will "
|
||||
"immediately fail. Default 0",
|
||||
},
|
||||
{
|
||||
.name = "open-timeout",
|
||||
.type = QEMU_OPT_NUMBER,
|
||||
.help = "In seconds. If zero, the nbd driver tries the connection "
|
||||
"only once, and fails to open if the connection fails. "
|
||||
"If non-zero, the nbd driver will repeat connection "
|
||||
"attempts until successful or until @open-timeout seconds "
|
||||
"have elapsed. Default 0",
|
||||
},
|
||||
{ /* end of list */ }
|
||||
},
|
||||
};
|
||||
|
@ -1797,6 +1834,7 @@ static int nbd_process_options(BlockDriverState *bs, QDict *options,
|
|||
}
|
||||
|
||||
s->reconnect_delay = qemu_opt_get_number(opts, "reconnect-delay", 0);
|
||||
s->open_timeout = qemu_opt_get_number(opts, "open-timeout", 0);
|
||||
|
||||
ret = 0;
|
||||
|
||||
|
@ -1828,7 +1866,12 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
|
|||
s->conn = nbd_client_connection_new(s->saddr, true, s->export,
|
||||
s->x_dirty_bitmap, s->tlscreds);
|
||||
|
||||
/* TODO: Configurable retry-until-timeout behaviour. */
|
||||
if (s->open_timeout) {
|
||||
nbd_client_connection_enable_retry(s->conn);
|
||||
open_timer_init(s, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
|
||||
s->open_timeout * NANOSECONDS_PER_SECOND);
|
||||
}
|
||||
|
||||
s->state = NBD_CLIENT_CONNECTING_WAIT;
|
||||
ret = nbd_do_establish_connection(bs, errp);
|
||||
if (ret < 0) {
|
||||
|
|
|
@ -39,16 +39,18 @@ struct NBDClientConnection {
|
|||
|
||||
QemuMutex mutex;
|
||||
|
||||
/*
|
||||
* @sioc and @err represent a connection attempt. While running
|
||||
* is true, they are only used by the connection thread, and mutex
|
||||
* locking is not needed. Once the thread finishes,
|
||||
* nbd_co_establish_connection then steals these pointers while
|
||||
* under the mutex.
|
||||
*/
|
||||
NBDExportInfo updated_info;
|
||||
/*
|
||||
* @sioc represents a successful result. While thread is running, @sioc is
|
||||
* used only by thread and not protected by mutex. When thread is not
|
||||
* running, @sioc is stolen by nbd_co_establish_connection() under mutex.
|
||||
*/
|
||||
QIOChannelSocket *sioc;
|
||||
QIOChannel *ioc;
|
||||
/*
|
||||
* @err represents previous attempt. It may be copied by
|
||||
* nbd_co_establish_connection() when it reports failure.
|
||||
*/
|
||||
Error *err;
|
||||
|
||||
/* All further fields are accessed only under mutex */
|
||||
|
@ -170,18 +172,18 @@ static void *connect_thread_func(void *opaque)
|
|||
|
||||
qemu_mutex_lock(&conn->mutex);
|
||||
while (!conn->detached) {
|
||||
Error *local_err = NULL;
|
||||
|
||||
assert(!conn->sioc);
|
||||
conn->sioc = qio_channel_socket_new();
|
||||
|
||||
qemu_mutex_unlock(&conn->mutex);
|
||||
|
||||
error_free(conn->err);
|
||||
conn->err = NULL;
|
||||
conn->updated_info = conn->initial_info;
|
||||
|
||||
ret = nbd_connect(conn->sioc, conn->saddr,
|
||||
conn->do_negotiation ? &conn->updated_info : NULL,
|
||||
conn->tlscreds, &conn->ioc, &conn->err);
|
||||
conn->tlscreds, &conn->ioc, &local_err);
|
||||
|
||||
/*
|
||||
* conn->updated_info will finally be returned to the user. Clear the
|
||||
|
@ -194,6 +196,10 @@ static void *connect_thread_func(void *opaque)
|
|||
|
||||
qemu_mutex_lock(&conn->mutex);
|
||||
|
||||
error_free(conn->err);
|
||||
conn->err = NULL;
|
||||
error_propagate(&conn->err, local_err);
|
||||
|
||||
if (ret < 0) {
|
||||
object_unref(OBJECT(conn->sioc));
|
||||
conn->sioc = NULL;
|
||||
|
@ -311,14 +317,17 @@ nbd_co_establish_connection(NBDClientConnection *conn, NBDExportInfo *info,
|
|||
}
|
||||
|
||||
conn->running = true;
|
||||
error_free(conn->err);
|
||||
conn->err = NULL;
|
||||
qemu_thread_create(&thread, "nbd-connect",
|
||||
connect_thread_func, conn, QEMU_THREAD_DETACHED);
|
||||
}
|
||||
|
||||
if (!blocking) {
|
||||
error_setg(errp, "No connection at the moment");
|
||||
if (conn->err) {
|
||||
error_propagate(errp, error_copy(conn->err));
|
||||
} else {
|
||||
error_setg(errp, "No connection at the moment");
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -339,14 +348,30 @@ nbd_co_establish_connection(NBDClientConnection *conn, NBDExportInfo *info,
|
|||
* attempt as failed, but leave the connection thread running,
|
||||
* to reuse it for the next connection attempt.
|
||||
*/
|
||||
error_setg(errp, "Connection attempt cancelled by other operation");
|
||||
if (conn->err) {
|
||||
error_propagate(errp, error_copy(conn->err));
|
||||
} else {
|
||||
/*
|
||||
* The only possible case here is cancelling by open_timer
|
||||
* during nbd_open(). So, the error message is for that case.
|
||||
* If we have more use cases, we can refactor
|
||||
* nbd_co_establish_connection_cancel() to take an additional
|
||||
* parameter cancel_reason, that would be passed than to the
|
||||
* caller of cancelled nbd_co_establish_connection().
|
||||
*/
|
||||
error_setg(errp, "Connection attempt cancelled by timeout");
|
||||
}
|
||||
|
||||
return NULL;
|
||||
} else {
|
||||
error_propagate(errp, conn->err);
|
||||
conn->err = NULL;
|
||||
if (!conn->sioc) {
|
||||
/* Thread finished. There must be either error or sioc */
|
||||
assert(!conn->err != !conn->sioc);
|
||||
|
||||
if (conn->err) {
|
||||
error_propagate(errp, error_copy(conn->err));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (conn->do_negotiation) {
|
||||
memcpy(info, &conn->updated_info, sizeof(*info));
|
||||
if (conn->ioc) {
|
||||
|
|
|
@ -4096,6 +4096,12 @@
|
|||
# future requests before a successful reconnect will
|
||||
# immediately fail. Default 0 (Since 4.2)
|
||||
#
|
||||
# @open-timeout: In seconds. If zero, the nbd driver tries the connection
|
||||
# only once, and fails to open if the connection fails.
|
||||
# If non-zero, the nbd driver will repeat connection attempts
|
||||
# until successful or until @open-timeout seconds have elapsed.
|
||||
# Default 0 (Since 7.0)
|
||||
#
|
||||
# Features:
|
||||
# @unstable: Member @x-dirty-bitmap is experimental.
|
||||
#
|
||||
|
@ -4106,7 +4112,8 @@
|
|||
'*export': 'str',
|
||||
'*tls-creds': 'str',
|
||||
'*x-dirty-bitmap': { 'type': 'str', 'features': [ 'unstable' ] },
|
||||
'*reconnect-delay': 'uint32' } }
|
||||
'*reconnect-delay': 'uint32',
|
||||
'*open-timeout': 'uint32' } }
|
||||
|
||||
##
|
||||
# @BlockdevOptionsRaw:
|
||||
|
|
|
@ -138,14 +138,22 @@ def unarchive_sample_image(sample, fname):
|
|||
shutil.copyfileobj(f_in, f_out)
|
||||
|
||||
|
||||
def qemu_tool_popen(args: Sequence[str],
|
||||
connect_stderr: bool = True) -> 'subprocess.Popen[str]':
|
||||
stderr = subprocess.STDOUT if connect_stderr else None
|
||||
# pylint: disable=consider-using-with
|
||||
return subprocess.Popen(args,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=stderr,
|
||||
universal_newlines=True)
|
||||
|
||||
|
||||
def qemu_tool_pipe_and_status(tool: str, args: Sequence[str],
|
||||
connect_stderr: bool = True) -> Tuple[str, int]:
|
||||
"""
|
||||
Run a tool and return both its output and its exit code
|
||||
"""
|
||||
stderr = subprocess.STDOUT if connect_stderr else None
|
||||
with subprocess.Popen(args, stdout=subprocess.PIPE,
|
||||
stderr=stderr, universal_newlines=True) as subp:
|
||||
with qemu_tool_popen(args, connect_stderr) as subp:
|
||||
output = subp.communicate()[0]
|
||||
if subp.returncode < 0:
|
||||
cmd = ' '.join(args)
|
||||
|
@ -233,10 +241,18 @@ def img_info_log(filename, filter_path=None, imgopts=False, extra_args=()):
|
|||
filter_path = filename
|
||||
log(filter_img_info(output, filter_path))
|
||||
|
||||
def qemu_io_wrap_args(args: Sequence[str]) -> List[str]:
|
||||
if '-f' in args or '--image-opts' in args:
|
||||
return qemu_io_args_no_fmt + list(args)
|
||||
else:
|
||||
return qemu_io_args + list(args)
|
||||
|
||||
def qemu_io_popen(*args):
|
||||
return qemu_tool_popen(qemu_io_wrap_args(args))
|
||||
|
||||
def qemu_io(*args):
|
||||
'''Run qemu-io and return the stdout data'''
|
||||
args = qemu_io_args + list(args)
|
||||
return qemu_tool_pipe_and_status('qemu-io', args)[0]
|
||||
return qemu_tool_pipe_and_status('qemu-io', qemu_io_wrap_args(args))[0]
|
||||
|
||||
def qemu_io_log(*args):
|
||||
result = qemu_io(*args)
|
||||
|
@ -245,12 +261,7 @@ def qemu_io_log(*args):
|
|||
|
||||
def qemu_io_silent(*args):
|
||||
'''Run qemu-io and return the exit code, suppressing stdout'''
|
||||
if '-f' in args or '--image-opts' in args:
|
||||
default_args = qemu_io_args_no_fmt
|
||||
else:
|
||||
default_args = qemu_io_args
|
||||
|
||||
args = default_args + list(args)
|
||||
args = qemu_io_wrap_args(args)
|
||||
result = subprocess.run(args, stdout=subprocess.DEVNULL, check=False)
|
||||
if result.returncode < 0:
|
||||
sys.stderr.write('qemu-io received signal %i: %s\n' %
|
||||
|
@ -259,14 +270,14 @@ def qemu_io_silent(*args):
|
|||
|
||||
def qemu_io_silent_check(*args):
|
||||
'''Run qemu-io and return the true if subprocess returned 0'''
|
||||
args = qemu_io_args + list(args)
|
||||
args = qemu_io_wrap_args(args)
|
||||
result = subprocess.run(args, stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.STDOUT, check=False)
|
||||
return result.returncode == 0
|
||||
|
||||
class QemuIoInteractive:
|
||||
def __init__(self, *args):
|
||||
self.args = qemu_io_args_no_fmt + list(args)
|
||||
self.args = qemu_io_wrap_args(args)
|
||||
# We need to keep the Popen objext around, and not
|
||||
# close it immediately. Therefore, disable the pylint check:
|
||||
# pylint: disable=consider-using-with
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
#!/usr/bin/env python3
|
||||
#
|
||||
# Test nbd reconnect on open
|
||||
#
|
||||
# Copyright (c) 2020 Virtuozzo International GmbH
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
import time
|
||||
|
||||
import iotests
|
||||
from iotests import qemu_img_create, file_path, qemu_io_popen, qemu_nbd, \
|
||||
qemu_io_log, log
|
||||
|
||||
iotests.script_initialize(supported_fmts=['qcow2'])
|
||||
|
||||
disk, nbd_sock = file_path('disk', 'nbd-sock')
|
||||
|
||||
|
||||
def create_args(open_timeout):
|
||||
return ['--image-opts', '-c', 'read 0 1M',
|
||||
f'driver=nbd,open-timeout={open_timeout},'
|
||||
f'server.type=unix,server.path={nbd_sock}']
|
||||
|
||||
|
||||
def check_fail_to_connect(open_timeout):
|
||||
log(f'Check fail to connect with {open_timeout} seconds of timeout')
|
||||
|
||||
start_t = time.time()
|
||||
qemu_io_log(*create_args(open_timeout))
|
||||
delta_t = time.time() - start_t
|
||||
|
||||
max_delta = open_timeout + 0.2
|
||||
if open_timeout <= delta_t <= max_delta:
|
||||
log(f'qemu_io finished in {open_timeout}..{max_delta} seconds, OK')
|
||||
else:
|
||||
note = 'too early' if delta_t < open_timeout else 'too long'
|
||||
log(f'qemu_io finished in {delta_t:.1f} seconds, {note}')
|
||||
|
||||
|
||||
qemu_img_create('-f', iotests.imgfmt, disk, '1M')
|
||||
|
||||
# Start NBD client when NBD server is not yet running. It should not fail, but
|
||||
# wait for 5 seconds for the server to be available.
|
||||
client = qemu_io_popen(*create_args(5))
|
||||
|
||||
time.sleep(1)
|
||||
qemu_nbd('-k', nbd_sock, '-f', iotests.imgfmt, disk)
|
||||
|
||||
# client should succeed
|
||||
log(client.communicate()[0], filters=[iotests.filter_qemu_io])
|
||||
|
||||
# Server was started without --persistent flag, so it should be off now. Let's
|
||||
# check it and at the same time check that with open-timeout=0 client fails
|
||||
# immediately.
|
||||
check_fail_to_connect(0)
|
||||
|
||||
# Check that we will fail after non-zero timeout if server is still unavailable
|
||||
check_fail_to_connect(1)
|
|
@ -0,0 +1,11 @@
|
|||
read 1048576/1048576 bytes at offset 0
|
||||
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
|
||||
Check fail to connect with 0 seconds of timeout
|
||||
qemu-io: can't open: Failed to connect to 'TEST_DIR/PID-nbd-sock': No such file or directory
|
||||
|
||||
qemu_io finished in 0..0.2 seconds, OK
|
||||
Check fail to connect with 1 seconds of timeout
|
||||
qemu-io: can't open: Failed to connect to 'TEST_DIR/PID-nbd-sock': No such file or directory
|
||||
|
||||
qemu_io finished in 1..1.2 seconds, OK
|
Loading…
Reference in New Issue