From b87151a848b0019775bfddb16b79f5adcad3f36c Mon Sep 17 00:00:00 2001 From: Andrey Drobyshev Date: Fri, 8 Sep 2023 01:07:18 +0300 Subject: [PATCH 01/13] qemu-iotests/197: use more generic commands for formats other than qcow2 In the previous commit e2f938265e0 ("tests/qemu-iotests/197: add testcase for CoR with subclusters") we've introduced a new testcase for copy-on-read with subclusters. Test 197 always forces qcow2 as the top image, but allows backing image to be in any format. That last test case didn't meet these requirements, so let's fix it by using more generic "qemu-io -c map" command. Signed-off-by: Andrey Drobyshev Message-ID: <20230907220718.983430-1-andrey.drobyshev@virtuozzo.com> Tested-by: Eric Blake Reviewed-by: Eric Blake Signed-off-by: Eric Blake --- tests/qemu-iotests/197 | 8 ++++---- tests/qemu-iotests/197.out | 18 ++++++++---------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197 index f07a9da136..8ad2bdb035 100755 --- a/tests/qemu-iotests/197 +++ b/tests/qemu-iotests/197 @@ -136,18 +136,18 @@ IMGPROTO=file IMGFMT=qcow2 TEST_IMG_FILE="$TEST_WRAP" \ $QEMU_IO -c "write -P 0xaa 0 64k" "$TEST_IMG" | _filter_qemu_io # Allocate individual subclusters in the top image, and not the whole cluster -$QEMU_IO -c "write -P 0xbb 28K 2K" -c "write -P 0xcc 34K 2K" "$TEST_WRAP" \ +$QEMU_IO -f qcow2 -c "write -P 0xbb 28K 2K" -c "write -P 0xcc 34K 2K" "$TEST_WRAP" \ | _filter_qemu_io # Only 2 subclusters should be allocated in the top image at this point -$QEMU_IMG map "$TEST_WRAP" | _filter_qemu_img_map +$QEMU_IO -f qcow2 -c map "$TEST_WRAP" # Actual copy-on-read operation -$QEMU_IO -C -c "read -P 0xaa 30K 4K" "$TEST_WRAP" | _filter_qemu_io +$QEMU_IO -f qcow2 -C -c "read -P 0xaa 30K 4K" "$TEST_WRAP" | _filter_qemu_io # And here we should have 4 subclusters allocated right in the middle of the # top image. Make sure the whole cluster remains unallocated -$QEMU_IMG map "$TEST_WRAP" | _filter_qemu_img_map +$QEMU_IO -f qcow2 -c map "$TEST_WRAP" _check_test_img diff --git a/tests/qemu-iotests/197.out b/tests/qemu-iotests/197.out index 8f34a30afe..86c57b51d3 100644 --- a/tests/qemu-iotests/197.out +++ b/tests/qemu-iotests/197.out @@ -42,17 +42,15 @@ wrote 2048/2048 bytes at offset 28672 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 2048/2048 bytes at offset 34816 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -Offset Length File -0 0x7000 TEST_DIR/t.IMGFMT -0x7000 0x800 TEST_DIR/t.wrap.IMGFMT -0x7800 0x1000 TEST_DIR/t.IMGFMT -0x8800 0x800 TEST_DIR/t.wrap.IMGFMT -0x9000 0x7000 TEST_DIR/t.IMGFMT +28 KiB (0x7000) bytes not allocated at offset 0 bytes (0x0) +2 KiB (0x800) bytes allocated at offset 28 KiB (0x7000) +4 KiB (0x1000) bytes not allocated at offset 30 KiB (0x7800) +2 KiB (0x800) bytes allocated at offset 34 KiB (0x8800) +28 KiB (0x7000) bytes not allocated at offset 36 KiB (0x9000) read 4096/4096 bytes at offset 30720 4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -Offset Length File -0 0x7000 TEST_DIR/t.IMGFMT -0x7000 0x2000 TEST_DIR/t.wrap.IMGFMT -0x9000 0x7000 TEST_DIR/t.IMGFMT +28 KiB (0x7000) bytes not allocated at offset 0 bytes (0x0) +8 KiB (0x2000) bytes allocated at offset 28 KiB (0x7000) +28 KiB (0x7000) bytes not allocated at offset 36 KiB (0x9000) No errors were found on the image. *** done From b84ca91ca28d044b2a7371a8ea797c9e75d8abb4 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 30 Aug 2023 18:47:59 -0400 Subject: [PATCH 02/13] nbd: drop unused nbd_receive_negotiate() aio_context argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit aio_context is always NULL, so drop it. Suggested-by: Fabiano Rosas Signed-off-by: Stefan Hajnoczi Reviewed-by: Eric Blake Reviewed-by: Philippe Mathieu-Daudé Message-ID: <20230830224802.493686-2-stefanha@redhat.com> Signed-off-by: Eric Blake --- include/block/nbd.h | 3 +-- nbd/client-connection.c | 3 +-- nbd/client.c | 5 ++--- qemu-nbd.c | 4 ++-- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/include/block/nbd.h b/include/block/nbd.h index 4428bcffbb..f672b76173 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -324,8 +324,7 @@ typedef struct NBDExportInfo { char **contexts; } NBDExportInfo; -int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc, - QCryptoTLSCreds *tlscreds, +int nbd_receive_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds, const char *hostname, QIOChannel **outioc, NBDExportInfo *info, Error **errp); void nbd_free_export_list(NBDExportInfo *info, int count); diff --git a/nbd/client-connection.c b/nbd/client-connection.c index 3d14296c04..aafb3d0fb4 100644 --- a/nbd/client-connection.c +++ b/nbd/client-connection.c @@ -146,8 +146,7 @@ static int nbd_connect(QIOChannelSocket *sioc, SocketAddress *addr, return 0; } - ret = nbd_receive_negotiate(NULL, QIO_CHANNEL(sioc), tlscreds, - tlshostname, + ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), tlscreds, tlshostname, outioc, info, errp); if (ret < 0) { /* diff --git a/nbd/client.c b/nbd/client.c index 479208d5d9..16ec10c8a9 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -1014,8 +1014,7 @@ static int nbd_negotiate_finish_oldstyle(QIOChannel *ioc, NBDExportInfo *info, * Returns: negative errno: failure talking to server * 0: server is connected */ -int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc, - QCryptoTLSCreds *tlscreds, +int nbd_receive_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds, const char *hostname, QIOChannel **outioc, NBDExportInfo *info, Error **errp) { @@ -1027,7 +1026,7 @@ int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc, assert(info->name && strlen(info->name) <= NBD_MAX_STRING_SIZE); trace_nbd_receive_negotiate_name(info->name); - result = nbd_start_negotiate(aio_context, ioc, tlscreds, hostname, outioc, + result = nbd_start_negotiate(NULL, ioc, tlscreds, hostname, outioc, info->structured_reply, &zeroes, errp); if (result < 0) { return result; diff --git a/qemu-nbd.c b/qemu-nbd.c index aaccaa3318..b47459f781 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -295,8 +295,8 @@ static void *nbd_client_thread(void *arg) goto out; } - if (nbd_receive_negotiate(NULL, QIO_CHANNEL(sioc), - NULL, NULL, NULL, &info, &local_error) < 0) { + if (nbd_receive_negotiate(QIO_CHANNEL(sioc), NULL, NULL, NULL, + &info, &local_error) < 0) { if (local_error) { error_report_err(local_error); } From 078c8adaa61df4fe081660f0c14ce35ddd938de0 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 30 Aug 2023 18:48:00 -0400 Subject: [PATCH 03/13] nbd: drop unused nbd_start_negotiate() aio_context argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit aio_context is always NULL, so drop it. Suggested-by: Fabiano Rosas Signed-off-by: Stefan Hajnoczi Reviewed-by: Eric Blake Reviewed-by: Philippe Mathieu-Daudé Message-ID: <20230830224802.493686-3-stefanha@redhat.com> Signed-off-by: Eric Blake --- nbd/client.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/nbd/client.c b/nbd/client.c index 16ec10c8a9..bd7e200136 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -877,8 +877,7 @@ static int nbd_list_meta_contexts(QIOChannel *ioc, * Returns: negative errno: failure talking to server * non-negative: enum NBDMode describing server abilities */ -static int nbd_start_negotiate(AioContext *aio_context, QIOChannel *ioc, - QCryptoTLSCreds *tlscreds, +static int nbd_start_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds, const char *hostname, QIOChannel **outioc, bool structured_reply, bool *zeroes, Error **errp) @@ -946,10 +945,6 @@ static int nbd_start_negotiate(AioContext *aio_context, QIOChannel *ioc, return -EINVAL; } ioc = *outioc; - if (aio_context) { - qio_channel_set_blocking(ioc, false, NULL); - qio_channel_attach_aio_context(ioc, aio_context); - } } else { error_setg(errp, "Server does not support STARTTLS"); return -EINVAL; @@ -1026,7 +1021,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds, assert(info->name && strlen(info->name) <= NBD_MAX_STRING_SIZE); trace_nbd_receive_negotiate_name(info->name); - result = nbd_start_negotiate(NULL, ioc, tlscreds, hostname, outioc, + result = nbd_start_negotiate(ioc, tlscreds, hostname, outioc, info->structured_reply, &zeroes, errp); if (result < 0) { return result; @@ -1149,7 +1144,7 @@ int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds, QIOChannel *sioc = NULL; *info = NULL; - result = nbd_start_negotiate(NULL, ioc, tlscreds, hostname, &sioc, true, + result = nbd_start_negotiate(ioc, tlscreds, hostname, &sioc, true, NULL, errp); if (tlscreds && sioc) { ioc = sioc; From acd4be64b865e81094c690503b4f39804eb67a0b Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 30 Aug 2023 18:48:01 -0400 Subject: [PATCH 04/13] io: check there are no qio_channel_yield() coroutines during ->finalize() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Callers must clean up their coroutines before calling object_unref(OBJECT(ioc)) to prevent an fd handler leak. Add an assertion to check this. This patch is preparation for the fd handler changes that follow. Signed-off-by: Stefan Hajnoczi Reviewed-by: Daniel P. Berrangé Reviewed-by: Eric Blake Message-ID: <20230830224802.493686-4-stefanha@redhat.com> Signed-off-by: Eric Blake --- io/channel.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/io/channel.c b/io/channel.c index 72f0066af5..c415f3fc88 100644 --- a/io/channel.c +++ b/io/channel.c @@ -653,6 +653,10 @@ static void qio_channel_finalize(Object *obj) { QIOChannel *ioc = QIO_CHANNEL(obj); + /* Must not have coroutines in qio_channel_yield() */ + assert(!ioc->read_coroutine); + assert(!ioc->write_coroutine); + g_free(ioc->name); #ifdef _WIN32 From 06e0f098d612df79597de58121dadf6f5f375d04 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 30 Aug 2023 18:48:02 -0400 Subject: [PATCH 05/13] io: follow coroutine AioContext in qio_channel_yield() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ongoing QEMU multi-queue block layer effort makes it possible for multiple threads to process I/O in parallel. The nbd block driver is not compatible with the multi-queue block layer yet because QIOChannel cannot be used easily from coroutines running in multiple threads. This series changes the QIOChannel API to make that possible. In the current API, calling qio_channel_attach_aio_context() sets the AioContext where qio_channel_yield() installs an fd handler prior to yielding: qio_channel_attach_aio_context(ioc, my_ctx); ... qio_channel_yield(ioc); // my_ctx is used here ... qio_channel_detach_aio_context(ioc); This API design has limitations: reading and writing must be done in the same AioContext and moving between AioContexts involves a cumbersome sequence of API calls that is not suitable for doing on a per-request basis. There is no fundamental reason why a QIOChannel needs to run within the same AioContext every time qio_channel_yield() is called. QIOChannel only uses the AioContext while inside qio_channel_yield(). The rest of the time, QIOChannel is independent of any AioContext. In the new API, qio_channel_yield() queries the AioContext from the current coroutine using qemu_coroutine_get_aio_context(). There is no need to explicitly attach/detach AioContexts anymore and qio_channel_attach_aio_context() and qio_channel_detach_aio_context() are gone. One coroutine can read from the QIOChannel while another coroutine writes from a different AioContext. This API change allows the nbd block driver to use QIOChannel from any thread. It's important to keep in mind that the block driver already synchronizes QIOChannel access and ensures that two coroutines never read simultaneously or write simultaneously. This patch updates all users of qio_channel_attach_aio_context() to the new API. Most conversions are simple, but vhost-user-server requires a new qemu_coroutine_yield() call to quiesce the vu_client_trip() coroutine when not attached to any AioContext. While the API is has become simpler, there is one wart: QIOChannel has a special case for the iohandler AioContext (used for handlers that must not run in nested event loops). I didn't find an elegant way preserve that behavior, so I added a new API called qio_channel_set_follow_coroutine_ctx(ioc, true|false) for opting in to the new AioContext model. By default QIOChannel uses the iohandler AioHandler. Code that formerly called qio_channel_attach_aio_context() now calls qio_channel_set_follow_coroutine_ctx(ioc, true) once after the QIOChannel is created. Signed-off-by: Stefan Hajnoczi Reviewed-by: Eric Blake Acked-by: Daniel P. Berrangé Message-ID: <20230830224802.493686-5-stefanha@redhat.com> [eblake: also fix migration/rdma.c] Signed-off-by: Eric Blake --- block/nbd.c | 11 +-- include/io/channel-util.h | 23 ++++++ include/io/channel.h | 69 ++++++++---------- include/qemu/vhost-user-server.h | 1 + io/channel-command.c | 10 ++- io/channel-file.c | 9 ++- io/channel-null.c | 3 +- io/channel-socket.c | 9 ++- io/channel-tls.c | 6 +- io/channel-util.c | 24 +++++++ io/channel.c | 120 ++++++++++++++++++++++--------- migration/channel-block.c | 3 +- migration/rdma.c | 25 +++---- nbd/server.c | 14 +--- scsi/qemu-pr-helper.c | 4 +- util/vhost-user-server.c | 27 +++++-- 16 files changed, 229 insertions(+), 129 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index 5322e66166..cc48580df7 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -352,7 +352,7 @@ int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs, } qio_channel_set_blocking(s->ioc, false, NULL); - qio_channel_attach_aio_context(s->ioc, bdrv_get_aio_context(bs)); + qio_channel_set_follow_coroutine_ctx(s->ioc, true); /* successfully connected */ WITH_QEMU_LOCK_GUARD(&s->requests_lock) { @@ -397,7 +397,6 @@ static void coroutine_fn GRAPH_RDLOCK nbd_reconnect_attempt(BDRVNBDState *s) /* Finalize previous connection if any */ if (s->ioc) { - qio_channel_detach_aio_context(s->ioc); yank_unregister_function(BLOCKDEV_YANK_INSTANCE(s->bs->node_name), nbd_yank, s->bs); object_unref(OBJECT(s->ioc)); @@ -2089,10 +2088,6 @@ static void nbd_attach_aio_context(BlockDriverState *bs, * the reconnect_delay_timer cannot be active here. */ assert(!s->reconnect_delay_timer); - - if (s->ioc) { - qio_channel_attach_aio_context(s->ioc, new_context); - } } static void nbd_detach_aio_context(BlockDriverState *bs) @@ -2101,10 +2096,6 @@ static void nbd_detach_aio_context(BlockDriverState *bs) assert(!s->open_timer); assert(!s->reconnect_delay_timer); - - if (s->ioc) { - qio_channel_detach_aio_context(s->ioc); - } } static BlockDriver bdrv_nbd = { diff --git a/include/io/channel-util.h b/include/io/channel-util.h index a5d720d9a0..fa18a3756d 100644 --- a/include/io/channel-util.h +++ b/include/io/channel-util.h @@ -49,4 +49,27 @@ QIOChannel *qio_channel_new_fd(int fd, Error **errp); +/** + * qio_channel_util_set_aio_fd_handler: + * @read_fd: the file descriptor for the read handler + * @read_ctx: the AioContext for the read handler + * @io_read: the read handler + * @write_fd: the file descriptor for the write handler + * @write_ctx: the AioContext for the write handler + * @io_write: the write handler + * @opaque: the opaque argument to the read and write handler + * + * Set the read and write handlers when @read_ctx and @write_ctx are non-NULL, + * respectively. To leave a handler in its current state, pass a NULL + * AioContext. To clear a handler, pass a non-NULL AioContext and a NULL + * handler. + */ +void qio_channel_util_set_aio_fd_handler(int read_fd, + AioContext *read_ctx, + IOHandler *io_read, + int write_fd, + AioContext *write_ctx, + IOHandler *io_write, + void *opaque); + #endif /* QIO_CHANNEL_UTIL_H */ diff --git a/include/io/channel.h b/include/io/channel.h index 229bf36910..5f9dbaab65 100644 --- a/include/io/channel.h +++ b/include/io/channel.h @@ -81,9 +81,11 @@ struct QIOChannel { Object parent; unsigned int features; /* bitmask of QIOChannelFeatures */ char *name; - AioContext *ctx; + AioContext *read_ctx; Coroutine *read_coroutine; + AioContext *write_ctx; Coroutine *write_coroutine; + bool follow_coroutine_ctx; #ifdef _WIN32 HANDLE event; /* For use with GSource on Win32 */ #endif @@ -140,8 +142,9 @@ struct QIOChannelClass { int whence, Error **errp); void (*io_set_aio_fd_handler)(QIOChannel *ioc, - AioContext *ctx, + AioContext *read_ctx, IOHandler *io_read, + AioContext *write_ctx, IOHandler *io_write, void *opaque); int (*io_flush)(QIOChannel *ioc, @@ -498,6 +501,21 @@ int qio_channel_set_blocking(QIOChannel *ioc, bool enabled, Error **errp); +/** + * qio_channel_set_follow_coroutine_ctx: + * @ioc: the channel object + * @enabled: whether or not to follow the coroutine's AioContext + * + * If @enabled is true, calls to qio_channel_yield() use the current + * coroutine's AioContext. Usually this is desirable. + * + * If @enabled is false, calls to qio_channel_yield() use the global iohandler + * AioContext. This is may be used by coroutines that run in the main loop and + * do not wish to respond to I/O during nested event loops. This is the + * default for compatibility with code that is not aware of AioContexts. + */ +void qio_channel_set_follow_coroutine_ctx(QIOChannel *ioc, bool enabled); + /** * qio_channel_close: * @ioc: the channel object @@ -703,41 +721,6 @@ GSource *qio_channel_add_watch_source(QIOChannel *ioc, GDestroyNotify notify, GMainContext *context); -/** - * qio_channel_attach_aio_context: - * @ioc: the channel object - * @ctx: the #AioContext to set the handlers on - * - * Request that qio_channel_yield() sets I/O handlers on - * the given #AioContext. If @ctx is %NULL, qio_channel_yield() - * uses QEMU's main thread event loop. - * - * You can move a #QIOChannel from one #AioContext to another even if - * I/O handlers are set for a coroutine. However, #QIOChannel provides - * no synchronization between the calls to qio_channel_yield() and - * qio_channel_attach_aio_context(). - * - * Therefore you should first call qio_channel_detach_aio_context() - * to ensure that the coroutine is not entered concurrently. Then, - * while the coroutine has yielded, call qio_channel_attach_aio_context(), - * and then aio_co_schedule() to place the coroutine on the new - * #AioContext. The calls to qio_channel_detach_aio_context() - * and qio_channel_attach_aio_context() should be protected with - * aio_context_acquire() and aio_context_release(). - */ -void qio_channel_attach_aio_context(QIOChannel *ioc, - AioContext *ctx); - -/** - * qio_channel_detach_aio_context: - * @ioc: the channel object - * - * Disable any I/O handlers set by qio_channel_yield(). With the - * help of aio_co_schedule(), this allows moving a coroutine that was - * paused by qio_channel_yield() to another context. - */ -void qio_channel_detach_aio_context(QIOChannel *ioc); - /** * qio_channel_yield: * @ioc: the channel object @@ -785,8 +768,9 @@ void qio_channel_wait(QIOChannel *ioc, /** * qio_channel_set_aio_fd_handler: * @ioc: the channel object - * @ctx: the AioContext to set the handlers on + * @read_ctx: the AioContext to set the read handler on or NULL * @io_read: the read handler + * @write_ctx: the AioContext to set the write handler on or NULL * @io_write: the write handler * @opaque: the opaque value passed to the handler * @@ -794,10 +778,17 @@ void qio_channel_wait(QIOChannel *ioc, * be used by channel implementations to forward the handlers * to another channel (e.g. from #QIOChannelTLS to the * underlying socket). + * + * When @read_ctx is NULL, don't touch the read handler. When @write_ctx is + * NULL, don't touch the write handler. Note that setting the read handler + * clears the write handler, and vice versa, if they share the same AioContext. + * Therefore the caller must pass both handlers together when sharing the same + * AioContext. */ void qio_channel_set_aio_fd_handler(QIOChannel *ioc, - AioContext *ctx, + AioContext *read_ctx, IOHandler *io_read, + AioContext *write_ctx, IOHandler *io_write, void *opaque); diff --git a/include/qemu/vhost-user-server.h b/include/qemu/vhost-user-server.h index b1c1cda886..64ad701015 100644 --- a/include/qemu/vhost-user-server.h +++ b/include/qemu/vhost-user-server.h @@ -43,6 +43,7 @@ typedef struct { unsigned int in_flight; /* atomic */ /* Protected by ctx lock */ + bool in_qio_channel_yield; bool wait_idle; VuDev vu_dev; QIOChannel *ioc; /* The I/O channel with the client */ diff --git a/io/channel-command.c b/io/channel-command.c index 7ed726c802..6d5f64e146 100644 --- a/io/channel-command.c +++ b/io/channel-command.c @@ -20,6 +20,7 @@ #include "qemu/osdep.h" #include "io/channel-command.h" +#include "io/channel-util.h" #include "io/channel-watch.h" #include "qapi/error.h" #include "qemu/module.h" @@ -331,14 +332,17 @@ static int qio_channel_command_close(QIOChannel *ioc, static void qio_channel_command_set_aio_fd_handler(QIOChannel *ioc, - AioContext *ctx, + AioContext *read_ctx, IOHandler *io_read, + AioContext *write_ctx, IOHandler *io_write, void *opaque) { QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); - aio_set_fd_handler(ctx, cioc->readfd, io_read, NULL, NULL, NULL, opaque); - aio_set_fd_handler(ctx, cioc->writefd, NULL, io_write, NULL, NULL, opaque); + + qio_channel_util_set_aio_fd_handler(cioc->readfd, read_ctx, io_read, + cioc->writefd, write_ctx, io_write, + opaque); } diff --git a/io/channel-file.c b/io/channel-file.c index 8b5821f452..4a12c61886 100644 --- a/io/channel-file.c +++ b/io/channel-file.c @@ -20,6 +20,7 @@ #include "qemu/osdep.h" #include "io/channel-file.h" +#include "io/channel-util.h" #include "io/channel-watch.h" #include "qapi/error.h" #include "qemu/module.h" @@ -192,13 +193,17 @@ static int qio_channel_file_close(QIOChannel *ioc, static void qio_channel_file_set_aio_fd_handler(QIOChannel *ioc, - AioContext *ctx, + AioContext *read_ctx, IOHandler *io_read, + AioContext *write_ctx, IOHandler *io_write, void *opaque) { QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); - aio_set_fd_handler(ctx, fioc->fd, io_read, io_write, NULL, NULL, opaque); + + qio_channel_util_set_aio_fd_handler(fioc->fd, read_ctx, io_read, + fioc->fd, write_ctx, io_write, + opaque); } static GSource *qio_channel_file_create_watch(QIOChannel *ioc, diff --git a/io/channel-null.c b/io/channel-null.c index 4fafdb770d..ef99586348 100644 --- a/io/channel-null.c +++ b/io/channel-null.c @@ -128,8 +128,9 @@ qio_channel_null_close(QIOChannel *ioc, static void qio_channel_null_set_aio_fd_handler(QIOChannel *ioc G_GNUC_UNUSED, - AioContext *ctx G_GNUC_UNUSED, + AioContext *read_ctx G_GNUC_UNUSED, IOHandler *io_read G_GNUC_UNUSED, + AioContext *write_ctx G_GNUC_UNUSED, IOHandler *io_write G_GNUC_UNUSED, void *opaque G_GNUC_UNUSED) { diff --git a/io/channel-socket.c b/io/channel-socket.c index d99945ebec..02ffb51e99 100644 --- a/io/channel-socket.c +++ b/io/channel-socket.c @@ -22,6 +22,7 @@ #include "qapi/qapi-visit-sockets.h" #include "qemu/module.h" #include "io/channel-socket.h" +#include "io/channel-util.h" #include "io/channel-watch.h" #include "trace.h" #include "qapi/clone-visitor.h" @@ -893,13 +894,17 @@ qio_channel_socket_shutdown(QIOChannel *ioc, } static void qio_channel_socket_set_aio_fd_handler(QIOChannel *ioc, - AioContext *ctx, + AioContext *read_ctx, IOHandler *io_read, + AioContext *write_ctx, IOHandler *io_write, void *opaque) { QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); - aio_set_fd_handler(ctx, sioc->fd, io_read, io_write, NULL, NULL, opaque); + + qio_channel_util_set_aio_fd_handler(sioc->fd, read_ctx, io_read, + sioc->fd, write_ctx, io_write, + opaque); } static GSource *qio_channel_socket_create_watch(QIOChannel *ioc, diff --git a/io/channel-tls.c b/io/channel-tls.c index 847d5297c3..58fe1aceee 100644 --- a/io/channel-tls.c +++ b/io/channel-tls.c @@ -388,14 +388,16 @@ static int qio_channel_tls_close(QIOChannel *ioc, } static void qio_channel_tls_set_aio_fd_handler(QIOChannel *ioc, - AioContext *ctx, + AioContext *read_ctx, IOHandler *io_read, + AioContext *write_ctx, IOHandler *io_write, void *opaque) { QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); - qio_channel_set_aio_fd_handler(tioc->master, ctx, io_read, io_write, opaque); + qio_channel_set_aio_fd_handler(tioc->master, read_ctx, io_read, + write_ctx, io_write, opaque); } typedef struct QIOChannelTLSSource QIOChannelTLSSource; diff --git a/io/channel-util.c b/io/channel-util.c index 848a7a43d6..4b340d46d7 100644 --- a/io/channel-util.c +++ b/io/channel-util.c @@ -36,3 +36,27 @@ QIOChannel *qio_channel_new_fd(int fd, } return ioc; } + + +void qio_channel_util_set_aio_fd_handler(int read_fd, + AioContext *read_ctx, + IOHandler *io_read, + int write_fd, + AioContext *write_ctx, + IOHandler *io_write, + void *opaque) +{ + if (read_fd == write_fd && read_ctx == write_ctx) { + aio_set_fd_handler(read_ctx, read_fd, io_read, io_write, + NULL, NULL, opaque); + } else { + if (read_ctx) { + aio_set_fd_handler(read_ctx, read_fd, io_read, NULL, + NULL, NULL, opaque); + } + if (write_ctx) { + aio_set_fd_handler(write_ctx, write_fd, NULL, io_write, + NULL, NULL, opaque); + } + } +} diff --git a/io/channel.c b/io/channel.c index c415f3fc88..86c5834510 100644 --- a/io/channel.c +++ b/io/channel.c @@ -365,6 +365,12 @@ int qio_channel_set_blocking(QIOChannel *ioc, } +void qio_channel_set_follow_coroutine_ctx(QIOChannel *ioc, bool enabled) +{ + ioc->follow_coroutine_ctx = enabled; +} + + int qio_channel_close(QIOChannel *ioc, Error **errp) { @@ -388,14 +394,16 @@ GSource *qio_channel_create_watch(QIOChannel *ioc, void qio_channel_set_aio_fd_handler(QIOChannel *ioc, - AioContext *ctx, + AioContext *read_ctx, IOHandler *io_read, + AioContext *write_ctx, IOHandler *io_write, void *opaque) { QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); - klass->io_set_aio_fd_handler(ioc, ctx, io_read, io_write, opaque); + klass->io_set_aio_fd_handler(ioc, read_ctx, io_read, write_ctx, io_write, + opaque); } guint qio_channel_add_watch_full(QIOChannel *ioc, @@ -542,56 +550,101 @@ static void qio_channel_restart_write(void *opaque) aio_co_wake(co); } -static void qio_channel_set_aio_fd_handlers(QIOChannel *ioc) +static void coroutine_fn +qio_channel_set_fd_handlers(QIOChannel *ioc, GIOCondition condition) { - IOHandler *rd_handler = NULL, *wr_handler = NULL; + AioContext *ctx = ioc->follow_coroutine_ctx ? + qemu_coroutine_get_aio_context(qemu_coroutine_self()) : + iohandler_get_aio_context(); + AioContext *read_ctx = NULL; + IOHandler *io_read = NULL; + AioContext *write_ctx = NULL; + IOHandler *io_write = NULL; + + if (condition == G_IO_IN) { + ioc->read_coroutine = qemu_coroutine_self(); + ioc->read_ctx = ctx; + read_ctx = ctx; + io_read = qio_channel_restart_read; + + /* + * Thread safety: if the other coroutine is set and its AioContext + * matches ours, then there is mutual exclusion between read and write + * because they share a single thread and it's safe to set both read + * and write fd handlers here. If the AioContext does not match ours, + * then both threads may run in parallel but there is no shared state + * to worry about. + */ + if (ioc->write_coroutine && ioc->write_ctx == ctx) { + write_ctx = ctx; + io_write = qio_channel_restart_write; + } + } else if (condition == G_IO_OUT) { + ioc->write_coroutine = qemu_coroutine_self(); + ioc->write_ctx = ctx; + write_ctx = ctx; + io_write = qio_channel_restart_write; + if (ioc->read_coroutine && ioc->read_ctx == ctx) { + read_ctx = ctx; + io_read = qio_channel_restart_read; + } + } else { + abort(); + } + + qio_channel_set_aio_fd_handler(ioc, read_ctx, io_read, + write_ctx, io_write, ioc); +} + +static void coroutine_fn +qio_channel_clear_fd_handlers(QIOChannel *ioc, GIOCondition condition) +{ + AioContext *read_ctx = NULL; + IOHandler *io_read = NULL; + AioContext *write_ctx = NULL; + IOHandler *io_write = NULL; AioContext *ctx; - if (ioc->read_coroutine) { - rd_handler = qio_channel_restart_read; - } - if (ioc->write_coroutine) { - wr_handler = qio_channel_restart_write; + if (condition == G_IO_IN) { + ctx = ioc->read_ctx; + read_ctx = ctx; + io_read = NULL; + if (ioc->write_coroutine && ioc->write_ctx == ctx) { + write_ctx = ctx; + io_write = qio_channel_restart_write; + } + } else if (condition == G_IO_OUT) { + ctx = ioc->write_ctx; + write_ctx = ctx; + io_write = NULL; + if (ioc->read_coroutine && ioc->read_ctx == ctx) { + read_ctx = ctx; + io_read = qio_channel_restart_read; + } + } else { + abort(); } - ctx = ioc->ctx ? ioc->ctx : iohandler_get_aio_context(); - qio_channel_set_aio_fd_handler(ioc, ctx, rd_handler, wr_handler, ioc); -} - -void qio_channel_attach_aio_context(QIOChannel *ioc, - AioContext *ctx) -{ - assert(!ioc->read_coroutine); - assert(!ioc->write_coroutine); - ioc->ctx = ctx; -} - -void qio_channel_detach_aio_context(QIOChannel *ioc) -{ - ioc->read_coroutine = NULL; - ioc->write_coroutine = NULL; - qio_channel_set_aio_fd_handlers(ioc); - ioc->ctx = NULL; + qio_channel_set_aio_fd_handler(ioc, read_ctx, io_read, + write_ctx, io_write, ioc); } void coroutine_fn qio_channel_yield(QIOChannel *ioc, GIOCondition condition) { - AioContext *ioc_ctx = ioc->ctx ?: qemu_get_aio_context(); + AioContext *ioc_ctx; assert(qemu_in_coroutine()); - assert(in_aio_context_home_thread(ioc_ctx)); + ioc_ctx = qemu_coroutine_get_aio_context(qemu_coroutine_self()); if (condition == G_IO_IN) { assert(!ioc->read_coroutine); - ioc->read_coroutine = qemu_coroutine_self(); } else if (condition == G_IO_OUT) { assert(!ioc->write_coroutine); - ioc->write_coroutine = qemu_coroutine_self(); } else { abort(); } - qio_channel_set_aio_fd_handlers(ioc); + qio_channel_set_fd_handlers(ioc, condition); qemu_coroutine_yield(); assert(in_aio_context_home_thread(ioc_ctx)); @@ -599,11 +652,10 @@ void coroutine_fn qio_channel_yield(QIOChannel *ioc, * through the aio_fd_handlers. */ if (condition == G_IO_IN) { assert(ioc->read_coroutine == NULL); - qio_channel_set_aio_fd_handlers(ioc); } else if (condition == G_IO_OUT) { assert(ioc->write_coroutine == NULL); - qio_channel_set_aio_fd_handlers(ioc); } + qio_channel_clear_fd_handlers(ioc, condition); } void qio_channel_wake_read(QIOChannel *ioc) diff --git a/migration/channel-block.c b/migration/channel-block.c index b7374363c3..fff8d87094 100644 --- a/migration/channel-block.c +++ b/migration/channel-block.c @@ -158,8 +158,9 @@ qio_channel_block_close(QIOChannel *ioc, static void qio_channel_block_set_aio_fd_handler(QIOChannel *ioc, - AioContext *ctx, + AioContext *read_ctx, IOHandler *io_read, + AioContext *write_ctx, IOHandler *io_write, void *opaque) { diff --git a/migration/rdma.c b/migration/rdma.c index ca430d319d..a2a3db35b1 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -3103,22 +3103,23 @@ static GSource *qio_channel_rdma_create_watch(QIOChannel *ioc, } static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc, - AioContext *ctx, - IOHandler *io_read, - IOHandler *io_write, - void *opaque) + AioContext *read_ctx, + IOHandler *io_read, + AioContext *write_ctx, + IOHandler *io_write, + void *opaque) { QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); if (io_read) { - aio_set_fd_handler(ctx, rioc->rdmain->recv_comp_channel->fd, io_read, - io_write, NULL, NULL, opaque); - aio_set_fd_handler(ctx, rioc->rdmain->send_comp_channel->fd, io_read, - io_write, NULL, NULL, opaque); + aio_set_fd_handler(read_ctx, rioc->rdmain->recv_comp_channel->fd, + io_read, io_write, NULL, NULL, opaque); + aio_set_fd_handler(read_ctx, rioc->rdmain->send_comp_channel->fd, + io_read, io_write, NULL, NULL, opaque); } else { - aio_set_fd_handler(ctx, rioc->rdmaout->recv_comp_channel->fd, io_read, - io_write, NULL, NULL, opaque); - aio_set_fd_handler(ctx, rioc->rdmaout->send_comp_channel->fd, io_read, - io_write, NULL, NULL, opaque); + aio_set_fd_handler(write_ctx, rioc->rdmaout->recv_comp_channel->fd, + io_read, io_write, NULL, NULL, opaque); + aio_set_fd_handler(write_ctx, rioc->rdmaout->send_comp_channel->fd, + io_read, io_write, NULL, NULL, opaque); } } diff --git a/nbd/server.c b/nbd/server.c index 8486b64b15..b5f93a20c9 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1333,6 +1333,7 @@ static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp) */ qio_channel_set_blocking(client->ioc, false, NULL); + qio_channel_set_follow_coroutine_ctx(client->ioc, true); trace_nbd_negotiate_begin(); memcpy(buf, "NBDMAGIC", 8); @@ -1352,11 +1353,6 @@ static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp) return ret; } - /* Attach the channel to the same AioContext as the export */ - if (client->exp && client->exp->common.ctx) { - qio_channel_attach_aio_context(client->ioc, client->exp->common.ctx); - } - assert(!client->optlen); trace_nbd_negotiate_success(); @@ -1465,7 +1461,6 @@ void nbd_client_put(NBDClient *client) */ assert(client->closing); - qio_channel_detach_aio_context(client->ioc); object_unref(OBJECT(client->sioc)); object_unref(OBJECT(client->ioc)); if (client->tlscreds) { @@ -1544,8 +1539,6 @@ static void blk_aio_attached(AioContext *ctx, void *opaque) exp->common.ctx = ctx; QTAILQ_FOREACH(client, &exp->clients, next) { - qio_channel_attach_aio_context(client->ioc, ctx); - assert(client->nb_requests == 0); assert(client->recv_coroutine == NULL); assert(client->send_coroutine == NULL); @@ -1555,14 +1548,9 @@ static void blk_aio_attached(AioContext *ctx, void *opaque) static void blk_aio_detach(void *opaque) { NBDExport *exp = opaque; - NBDClient *client; trace_nbd_blk_aio_detach(exp->name, exp->common.ctx); - QTAILQ_FOREACH(client, &exp->clients, next) { - qio_channel_detach_aio_context(client->ioc); - } - exp->common.ctx = NULL; } diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c index ae44a816e1..c6c6347e9b 100644 --- a/scsi/qemu-pr-helper.c +++ b/scsi/qemu-pr-helper.c @@ -735,8 +735,7 @@ static void coroutine_fn prh_co_entry(void *opaque) qio_channel_set_blocking(QIO_CHANNEL(client->ioc), false, NULL); - qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc), - qemu_get_aio_context()); + qio_channel_set_follow_coroutine_ctx(QIO_CHANNEL(client->ioc), true); /* A very simple negotiation for future extensibility. No features * are defined so write 0. @@ -796,7 +795,6 @@ static void coroutine_fn prh_co_entry(void *opaque) } out: - qio_channel_detach_aio_context(QIO_CHANNEL(client->ioc)); object_unref(OBJECT(client->ioc)); g_free(client); } diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c index cd17fb5326..b4b6bf30a2 100644 --- a/util/vhost-user-server.c +++ b/util/vhost-user-server.c @@ -127,7 +127,14 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg) if (rc < 0) { if (rc == QIO_CHANNEL_ERR_BLOCK) { assert(local_err == NULL); - qio_channel_yield(ioc, G_IO_IN); + if (server->ctx) { + server->in_qio_channel_yield = true; + qio_channel_yield(ioc, G_IO_IN); + server->in_qio_channel_yield = false; + } else { + /* Wait until attached to an AioContext again */ + qemu_coroutine_yield(); + } continue; } else { error_report_err(local_err); @@ -278,7 +285,7 @@ set_watch(VuDev *vu_dev, int fd, int vu_evt, vu_fd_watch->fd = fd; vu_fd_watch->cb = cb; qemu_socket_set_nonblock(fd); - aio_set_fd_handler(server->ioc->ctx, fd, kick_handler, + aio_set_fd_handler(server->ctx, fd, kick_handler, NULL, NULL, NULL, vu_fd_watch); vu_fd_watch->vu_dev = vu_dev; vu_fd_watch->pvt = pvt; @@ -299,7 +306,7 @@ static void remove_watch(VuDev *vu_dev, int fd) if (!vu_fd_watch) { return; } - aio_set_fd_handler(server->ioc->ctx, fd, NULL, NULL, NULL, NULL, NULL); + aio_set_fd_handler(server->ctx, fd, NULL, NULL, NULL, NULL, NULL); QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next); g_free(vu_fd_watch); @@ -344,6 +351,8 @@ static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc, /* TODO vu_message_write() spins if non-blocking! */ qio_channel_set_blocking(server->ioc, false, NULL); + qio_channel_set_follow_coroutine_ctx(server->ioc, true); + server->co_trip = qemu_coroutine_create(vu_client_trip, server); aio_context_acquire(server->ctx); @@ -399,13 +408,12 @@ void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx) return; } - qio_channel_attach_aio_context(server->ioc, ctx); - QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) { aio_set_fd_handler(ctx, vu_fd_watch->fd, kick_handler, NULL, NULL, NULL, vu_fd_watch); } + assert(!server->in_qio_channel_yield); aio_co_schedule(ctx, server->co_trip); } @@ -419,11 +427,16 @@ void vhost_user_server_detach_aio_context(VuServer *server) aio_set_fd_handler(server->ctx, vu_fd_watch->fd, NULL, NULL, NULL, NULL, vu_fd_watch); } - - qio_channel_detach_aio_context(server->ioc); } server->ctx = NULL; + + if (server->ioc) { + if (server->in_qio_channel_yield) { + /* Stop receiving the next vhost-user message */ + qio_channel_wake_read(server->ioc); + } + } } bool vhost_user_server_start(VuServer *server, From 522a9b94e0f8a1f89f1660a46121ab0d0eae3593 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Thu, 24 Aug 2023 17:47:06 +0100 Subject: [PATCH 06/13] util/iov: Avoid dynamic stack allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use autofree heap allocation instead of variable-length array on the stack. The codebase has very few VLAs, and if we can get rid of them all we can make the compiler error on new additions. This is a defensive measure against security bugs where an on-stack dynamic allocation isn't correctly size-checked (e.g. CVE-2021-3527). Signed-off-by: Philippe Mathieu-Daudé Signed-off-by: Peter Maydell Message-ID: <20230824164706.2652277-1-peter.maydell@linaro.org> Reviewed-by: Eric Blake Signed-off-by: Eric Blake --- util/iov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/iov.c b/util/iov.c index 866fb577f3..7e73948f5e 100644 --- a/util/iov.c +++ b/util/iov.c @@ -571,7 +571,7 @@ static int sortelem_cmp_src_index(const void *a, const void *b) */ void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf) { - IOVectorSortElem sortelems[src->niov]; + g_autofree IOVectorSortElem *sortelems = g_new(IOVectorSortElem, src->niov); void *last_end; int i; From b4bbdf51e3af2ac7a8dd269bcdadd11523978dda Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 6 Sep 2023 11:32:03 +0200 Subject: [PATCH 07/13] qemu-nbd: improve error message for dup2 error This error happens if we are not able to close the pipe to the parent (to trace errors in the child process) and assign stderr to /dev/null as required by the daemonizing convention. Signed-off-by: Denis V. Lunev Suggested-by: Eric Blake CC: Eric Blake CC: Vladimir Sementsov-Ogievskiy Message-ID: <20230906093210.339585-2-den@openvz.org> Reviewed-by: Eric Blake [eblake: commit message grammar] Signed-off-by: Eric Blake --- qemu-nbd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qemu-nbd.c b/qemu-nbd.c index b47459f781..e2480061a1 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -324,7 +324,7 @@ static void *nbd_client_thread(void *arg) } else { /* Close stderr so that the qemu-nbd process exits. */ if (dup2(STDOUT_FILENO, STDERR_FILENO) < 0) { - error_report("Could not set stderr to /dev/null: %s", + error_report("Could not release pipe to parent: %s", strerror(errno)); exit(EXIT_FAILURE); } @@ -1181,7 +1181,7 @@ int main(int argc, char **argv) if (fork_process) { if (dup2(STDOUT_FILENO, STDERR_FILENO) < 0) { - error_report("Could not set stderr to /dev/null: %s", + error_report("Could not release pipe to parent: %s", strerror(errno)); exit(EXIT_FAILURE); } From 3484f6769cbf9a2ebd47e316a6c39bf42fca0dcc Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 6 Sep 2023 11:32:04 +0200 Subject: [PATCH 08/13] qemu-nbd: define struct NbdClientOpts when HAVE_NBD_DEVICE is not defined This patch also drops definition of some locals in main() to avoid useless data copy. Signed-off-by: Denis V. Lunev CC: Eric Blake CC: Vladimir Sementsov-Ogievskiy Message-ID: <20230906093210.339585-3-den@openvz.org> Reviewed-by: Eric Blake Signed-off-by: Eric Blake --- qemu-nbd.c | 60 ++++++++++++++++++++++++------------------------------ 1 file changed, 27 insertions(+), 33 deletions(-) diff --git a/qemu-nbd.c b/qemu-nbd.c index e2480061a1..acbdc0cd8f 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -253,6 +253,12 @@ static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls, } +struct NbdClientOpts { + char *device; + bool fork_process; + bool verbose; +}; + #if HAVE_NBD_DEVICE static void *show_parts(void *arg) { @@ -271,12 +277,6 @@ static void *show_parts(void *arg) return NULL; } -struct NbdClientOpts { - char *device; - bool fork_process; - bool verbose; -}; - static void *nbd_client_thread(void *arg) { struct NbdClientOpts *opts = arg; @@ -519,7 +519,6 @@ int main(int argc, char **argv) const char *bindto = NULL; const char *port = NULL; char *sockpath = NULL; - char *device = NULL; QemuOpts *sn_opts = NULL; const char *sn_id_or_name = NULL; const char *sopt = "hVb:o:p:rsnc:dvk:e:f:tl:x:T:D:AB:L"; @@ -582,16 +581,16 @@ int main(int argc, char **argv) const char *tlshostname = NULL; bool imageOpts = false; bool writethrough = false; /* Client will flush as needed. */ - bool verbose = false; - bool fork_process = false; bool list = false; unsigned socket_activation; const char *pid_file_name = NULL; const char *selinux_label = NULL; BlockExportOptions *export_opts; -#if HAVE_NBD_DEVICE - struct NbdClientOpts opts; -#endif + struct NbdClientOpts opts = { + .fork_process = false, + .verbose = false, + .device = NULL, + }; #ifdef CONFIG_POSIX os_setup_early_signal_handling(); @@ -719,7 +718,7 @@ int main(int argc, char **argv) disconnect = true; break; case 'c': - device = optarg; + opts.device = optarg; break; case 'e': if (qemu_strtoi(optarg, NULL, 0, &shared) < 0 || @@ -750,7 +749,7 @@ int main(int argc, char **argv) } break; case 'v': - verbose = true; + opts.verbose = true; break; case 'V': version(argv[0]); @@ -782,7 +781,7 @@ int main(int argc, char **argv) tlsauthz = optarg; break; case QEMU_NBD_OPT_FORK: - fork_process = true; + opts.fork_process = true; break; case 'L': list = true; @@ -802,12 +801,12 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } if (export_name || export_description || dev_offset || - device || disconnect || fmt || sn_id_or_name || bitmaps || + opts.device || disconnect || fmt || sn_id_or_name || bitmaps || alloc_depth || seen_aio || seen_discard || seen_cache) { error_report("List mode is incompatible with per-device settings"); exit(EXIT_FAILURE); } - if (fork_process) { + if (opts.fork_process) { error_report("List mode is incompatible with forking"); exit(EXIT_FAILURE); } @@ -832,7 +831,8 @@ int main(int argc, char **argv) } } else { /* Using socket activation - check user didn't use -p etc. */ - const char *err_msg = socket_activation_validate_opts(device, sockpath, + const char *err_msg = socket_activation_validate_opts(opts.device, + sockpath, bindto, port, selinux_label, list); @@ -850,7 +850,7 @@ int main(int argc, char **argv) } if (tlscredsid) { - if (device) { + if (opts.device) { error_report("TLS is not supported with a host device"); exit(EXIT_FAILURE); } @@ -880,7 +880,7 @@ int main(int argc, char **argv) if (selinux_label) { #ifdef CONFIG_SELINUX - if (sockpath == NULL && device == NULL) { + if (sockpath == NULL && opts.device == NULL) { error_report("--selinux-label is not permitted without --socket"); exit(EXIT_FAILURE); } @@ -897,7 +897,7 @@ int main(int argc, char **argv) } #if !HAVE_NBD_DEVICE - if (disconnect || device) { + if (disconnect || opts.device) { error_report("Kernel /dev/nbdN support not available"); exit(EXIT_FAILURE); } @@ -919,7 +919,7 @@ int main(int argc, char **argv) } #endif - if ((device && !verbose) || fork_process) { + if ((opts.device && !opts.verbose) || opts.fork_process) { #ifndef WIN32 g_autoptr(GError) err = NULL; int stderr_fd[2]; @@ -1002,9 +1002,9 @@ int main(int argc, char **argv) #endif /* WIN32 */ } - if (device != NULL && sockpath == NULL) { + if (opts.device != NULL && sockpath == NULL) { sockpath = g_malloc(128); - snprintf(sockpath, 128, SOCKET_PATH, basename(device)); + snprintf(sockpath, 128, SOCKET_PATH, basename(opts.device)); } server = qio_net_listener_new(); @@ -1145,15 +1145,9 @@ int main(int argc, char **argv) blk_exp_add(export_opts, &error_fatal); qapi_free_BlockExportOptions(export_opts); - if (device) { + if (opts.device) { #if HAVE_NBD_DEVICE int ret; - opts = (struct NbdClientOpts) { - .device = device, - .fork_process = fork_process, - .verbose = verbose, - }; - ret = pthread_create(&client_thread, NULL, nbd_client_thread, &opts); if (ret != 0) { error_report("Failed to create client thread: %s", strerror(ret)); @@ -1179,7 +1173,7 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } - if (fork_process) { + if (opts.fork_process) { if (dup2(STDOUT_FILENO, STDERR_FILENO) < 0) { error_report("Could not release pipe to parent: %s", strerror(errno)); @@ -1203,7 +1197,7 @@ int main(int argc, char **argv) qemu_opts_del(sn_opts); - if (device) { + if (opts.device) { void *ret; pthread_join(client_thread, &ret); exit(ret != NULL); From b18d72d7230dcd18919af820b52f6c0f1b2512ca Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 6 Sep 2023 11:32:05 +0200 Subject: [PATCH 09/13] qemu-nbd: move srcpath into struct NbdClientOpts We pass other parameters into nbd_client_thread() in this way. This patch makes the code more consistent. Signed-off-by: Denis V. Lunev CC: Eric Blake CC: Vladimir Sementsov-Ogievskiy Message-ID: <20230906093210.339585-4-den@openvz.org> Reviewed-by: Eric Blake [eblake: Note that this also cleans up a -Wshadow issue, first introduced in e5b815b0] Signed-off-by: Eric Blake --- qemu-nbd.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/qemu-nbd.c b/qemu-nbd.c index acbdc0cd8f..16c59424f1 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -73,7 +73,6 @@ #define MBR_SIZE 512 -static char *srcpath; static SocketAddress *saddr; static int persistent = 0; static enum { RUNNING, TERMINATE, TERMINATED } state; @@ -255,6 +254,7 @@ static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls, struct NbdClientOpts { char *device; + char *srcpath; bool fork_process; bool verbose; }; @@ -320,7 +320,7 @@ static void *nbd_client_thread(void *arg) if (opts->verbose && !opts->fork_process) { fprintf(stderr, "NBD device %s is now connected to %s\n", - opts->device, srcpath); + opts->device, opts->srcpath); } else { /* Close stderr so that the qemu-nbd process exits. */ if (dup2(STDOUT_FILENO, STDERR_FILENO) < 0) { @@ -590,6 +590,7 @@ int main(int argc, char **argv) .fork_process = false, .verbose = false, .device = NULL, + .srcpath = NULL, }; #ifdef CONFIG_POSIX @@ -1059,19 +1060,19 @@ int main(int argc, char **argv) bdrv_init(); atexit(qemu_nbd_shutdown); - srcpath = argv[optind]; + opts.srcpath = argv[optind]; if (imageOpts) { - QemuOpts *opts; + QemuOpts *o; if (fmt) { error_report("--image-opts and -f are mutually exclusive"); exit(EXIT_FAILURE); } - opts = qemu_opts_parse_noisily(&file_opts, srcpath, true); - if (!opts) { + o = qemu_opts_parse_noisily(&file_opts, opts.srcpath, true); + if (!o) { qemu_opts_reset(&file_opts); exit(EXIT_FAILURE); } - options = qemu_opts_to_qdict(opts, NULL); + options = qemu_opts_to_qdict(o, NULL); qemu_opts_reset(&file_opts); blk = blk_new_open(NULL, NULL, options, flags, &local_err); } else { @@ -1079,7 +1080,7 @@ int main(int argc, char **argv) options = qdict_new(); qdict_put_str(options, "driver", fmt); } - blk = blk_new_open(srcpath, NULL, options, flags, &local_err); + blk = blk_new_open(opts.srcpath, NULL, options, flags, &local_err); } if (!blk) { From b2cecdfee3e68297a4de26aa3188d4025bdef4ca Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 6 Sep 2023 11:32:06 +0200 Subject: [PATCH 10/13] qemu-nbd: put saddr into into struct NbdClientOpts We pass other parameters into nbd_client_thread() in this way. This patch makes the code more consistent. Signed-off-by: Denis V. Lunev CC: Eric Blake CC: Vladimir Sementsov-Ogievskiy Message-ID: <20230906093210.339585-5-den@openvz.org> Reviewed-by: Eric Blake Signed-off-by: Eric Blake --- qemu-nbd.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/qemu-nbd.c b/qemu-nbd.c index 16c59424f1..86bb2f04e2 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -73,7 +73,6 @@ #define MBR_SIZE 512 -static SocketAddress *saddr; static int persistent = 0; static enum { RUNNING, TERMINATE, TERMINATED } state; static int shared = 1; @@ -255,6 +254,7 @@ static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls, struct NbdClientOpts { char *device; char *srcpath; + SocketAddress *saddr; bool fork_process; bool verbose; }; @@ -289,7 +289,7 @@ static void *nbd_client_thread(void *arg) sioc = qio_channel_socket_new(); if (qio_channel_socket_connect_sync(sioc, - saddr, + opts->saddr, &local_error) < 0) { error_report_err(local_error); goto out; @@ -591,6 +591,7 @@ int main(int argc, char **argv) .verbose = false, .device = NULL, .srcpath = NULL, + .saddr = NULL, }; #ifdef CONFIG_POSIX @@ -892,8 +893,8 @@ int main(int argc, char **argv) } if (list) { - saddr = nbd_build_socket_address(sockpath, bindto, port); - return qemu_nbd_client_list(saddr, tlscreds, + opts.saddr = nbd_build_socket_address(sockpath, bindto, port); + return qemu_nbd_client_list(opts.saddr, tlscreds, tlshostname ? tlshostname : bindto); } @@ -1024,8 +1025,8 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } #endif - saddr = nbd_build_socket_address(sockpath, bindto, port); - if (qio_net_listener_open_sync(server, saddr, backlog, + opts.saddr = nbd_build_socket_address(sockpath, bindto, port); + if (qio_net_listener_open_sync(server, opts.saddr, backlog, &local_err) < 0) { object_unref(OBJECT(server)); error_report_err(local_err); From 2eb7c2abe0b85a945549e91cb794d214c0905403 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 6 Sep 2023 11:32:07 +0200 Subject: [PATCH 11/13] qemu-nbd: invent nbd_client_release_pipe() helper Move the code from main() and nbd_client_thread() into the specific helper. This code is going to be grown. Signed-off-by: Denis V. Lunev CC: Eric Blake CC: Vladimir Sementsov-Ogievskiy Message-ID: <20230906093210.339585-6-den@openvz.org> Reviewed-by: Eric Blake Signed-off-by: Eric Blake --- qemu-nbd.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/qemu-nbd.c b/qemu-nbd.c index 86bb2f04e2..7c4e22def1 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -259,6 +259,16 @@ struct NbdClientOpts { bool verbose; }; +static void nbd_client_release_pipe(void) +{ + /* Close stderr so that the qemu-nbd process exits. */ + if (dup2(STDOUT_FILENO, STDERR_FILENO) < 0) { + error_report("Could not release pipe to parent: %s", + strerror(errno)); + exit(EXIT_FAILURE); + } +} + #if HAVE_NBD_DEVICE static void *show_parts(void *arg) { @@ -322,12 +332,7 @@ static void *nbd_client_thread(void *arg) fprintf(stderr, "NBD device %s is now connected to %s\n", opts->device, opts->srcpath); } else { - /* Close stderr so that the qemu-nbd process exits. */ - if (dup2(STDOUT_FILENO, STDERR_FILENO) < 0) { - error_report("Could not release pipe to parent: %s", - strerror(errno)); - exit(EXIT_FAILURE); - } + nbd_client_release_pipe(); } if (nbd_client(fd) < 0) { @@ -1176,11 +1181,7 @@ int main(int argc, char **argv) } if (opts.fork_process) { - if (dup2(STDOUT_FILENO, STDERR_FILENO) < 0) { - error_report("Could not release pipe to parent: %s", - strerror(errno)); - exit(EXIT_FAILURE); - } + nbd_client_release_pipe(); } state = RUNNING; From f6f4620cbbe03fbdbddddd9c325ab3ea0dad33c6 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 6 Sep 2023 11:32:08 +0200 Subject: [PATCH 12/13] qemu-nbd: Restore "qemu-nbd -v --fork" output Closing stderr earlier is good for daemonized qemu-nbd under ssh earlier, but breaks the case where -v is being used to track what is happening in the server, as in iotest 233. When we know we are verbose, we should preserve original stderr and restore it once the setup stage is done. This commit restores the original behavior with -v option. In this case original output inside the test is kept intact. Reported-by: Kevin Wolf Signed-off-by: Denis V. Lunev CC: Eric Blake CC: Vladimir Sementsov-Ogievskiy CC: Hanna Reitz CC: Mike Maslenkin Fixes: 5c56dd27a2 ("qemu-nbd: fix regression with qemu-nbd --fork run over ssh") Message-ID: <20230906093210.339585-7-den@openvz.org> Reviewed-by: Eric Blake Tested-by: Eric Blake [eblake: fix build by avoiding stderr as struct member name] Signed-off-by: Eric Blake --- qemu-nbd.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/qemu-nbd.c b/qemu-nbd.c index 7c4e22def1..30eeb6f3c7 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -255,18 +255,23 @@ struct NbdClientOpts { char *device; char *srcpath; SocketAddress *saddr; + int old_stderr; bool fork_process; bool verbose; }; -static void nbd_client_release_pipe(void) +static void nbd_client_release_pipe(int old_stderr) { /* Close stderr so that the qemu-nbd process exits. */ - if (dup2(STDOUT_FILENO, STDERR_FILENO) < 0) { + if (dup2(old_stderr, STDERR_FILENO) < 0) { error_report("Could not release pipe to parent: %s", strerror(errno)); exit(EXIT_FAILURE); } + if (old_stderr != STDOUT_FILENO && close(old_stderr) < 0) { + error_report("Could not release qemu-nbd: %s", strerror(errno)); + exit(EXIT_FAILURE); + } } #if HAVE_NBD_DEVICE @@ -332,7 +337,7 @@ static void *nbd_client_thread(void *arg) fprintf(stderr, "NBD device %s is now connected to %s\n", opts->device, opts->srcpath); } else { - nbd_client_release_pipe(); + nbd_client_release_pipe(opts->old_stderr); } if (nbd_client(fd) < 0) { @@ -597,6 +602,7 @@ int main(int argc, char **argv) .device = NULL, .srcpath = NULL, .saddr = NULL, + .old_stderr = STDOUT_FILENO, }; #ifdef CONFIG_POSIX @@ -951,6 +957,16 @@ int main(int argc, char **argv) close(stderr_fd[0]); + /* Remember parent's stderr if we will be restoring it. */ + if (opts.verbose /* fork_process is set */) { + opts.old_stderr = dup(STDERR_FILENO); + if (opts.old_stderr < 0) { + error_report("Could not dup original stderr: %s", + strerror(errno)); + exit(EXIT_FAILURE); + } + } + ret = qemu_daemon(1, 0); saved_errno = errno; /* dup2 will overwrite error below */ @@ -1181,7 +1197,7 @@ int main(int argc, char **argv) } if (opts.fork_process) { - nbd_client_release_pipe(); + nbd_client_release_pipe(opts.old_stderr); } state = RUNNING; From 35e087de085cd6cf7e4c64c9b59b62c37ddcd1bd Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 6 Sep 2023 11:32:09 +0200 Subject: [PATCH 13/13] qemu-nbd: document -v behavior in respect to --fork in man Signed-off-by: Denis V. Lunev CC: Eric Blake CC: Vladimir Sementsov-Ogievskiy Message-ID: <20230906093210.339585-8-den@openvz.org> Reviewed-by: Eric Blake [eblake: Wording improvement] Signed-off-by: Eric Blake --- docs/tools/qemu-nbd.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/tools/qemu-nbd.rst b/docs/tools/qemu-nbd.rst index faf6349ea5..329f44d989 100644 --- a/docs/tools/qemu-nbd.rst +++ b/docs/tools/qemu-nbd.rst @@ -197,7 +197,9 @@ driver options if :option:`--image-opts` is specified. .. option:: -v, --verbose - Display extra debugging information. + Display extra debugging information. This option also keeps the original + *STDERR* stream open if the ``qemu-nbd`` process is daemonized due to + other options like :option:`--fork` or :option:`-c`. .. option:: -h, --help