diff --git a/.mailmap b/.mailmap index 64ef9f4de6..d214959288 100644 --- a/.mailmap +++ b/.mailmap @@ -40,12 +40,26 @@ Nick Hudson hnick@vmware.com # for the cvs2svn initialization commit e63c3dc74bf. # Next, translate a few commits where mailman rewrote the From: line due -# to strict SPF, although we prefer to avoid adding more entries like that. +# to strict SPF and DMARC. Usually, our build process should be flagging +# commits like these before maintainer merges; if you find the need to add +# a line here, please also report a bug against the part of the build +# process that let the mis-attribution slip through in the first place. +# +# If the mailing list munges your emails, use: +# git config sendemail.from '"Your Name" ' +# the use of "" in that line will differ from the typically unquoted +# 'git config user.name', which in turn is sufficient for 'git send-email' +# to add an extra From: line in the body of your email that takes +# precedence over any munged From: in the mail's headers. +# See https://lists.openembedded.org/g/openembedded-core/message/166515 +# and https://lists.gnu.org/archive/html/qemu-devel/2023-09/msg06784.html Ed Swierk Ed Swierk via Qemu-devel Ian McKellar Ian McKellar via Qemu-devel Julia Suvorova Julia Suvorova via Qemu-devel Justin Terry (VM) Justin Terry (VM) via Qemu-devel Stefan Weil Stefan Weil via +Andrey Drobyshev Andrey Drobyshev via +BALATON Zoltan BALATON Zoltan via # Next, replace old addresses by a more recent one. Aleksandar Markovic diff --git a/block/nbd.c b/block/nbd.c index 4a7f37da1c..52ebc8b2f5 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -416,7 +416,8 @@ static void coroutine_fn GRAPH_RDLOCK nbd_reconnect_attempt(BDRVNBDState *s) reconnect_delay_timer_del(s); } -static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t cookie) +static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t cookie, + Error **errp) { int ret; uint64_t ind = COOKIE_TO_INDEX(cookie), ind2; @@ -457,20 +458,25 @@ static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t cookie) /* We are under mutex and cookie is 0. We have to do the dirty work. */ assert(s->reply.cookie == 0); - ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, NULL); - if (ret <= 0) { - ret = ret ? ret : -EIO; + ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, s->info.mode, errp); + if (ret == 0) { + ret = -EIO; + error_setg(errp, "server dropped connection"); + } + if (ret < 0) { nbd_channel_error(s, ret); return ret; } if (nbd_reply_is_structured(&s->reply) && s->info.mode < NBD_MODE_STRUCTURED) { nbd_channel_error(s, -EINVAL); + error_setg(errp, "unexpected structured reply"); return -EINVAL; } ind2 = COOKIE_TO_INDEX(s->reply.cookie); if (ind2 >= MAX_NBD_REQUESTS || !s->requests[ind2].coroutine) { nbd_channel_error(s, -EINVAL); + error_setg(errp, "unexpected cookie value"); return -EINVAL; } if (s->reply.cookie == cookie) { @@ -609,13 +615,17 @@ static int nbd_parse_offset_hole_payload(BDRVNBDState *s, */ static int nbd_parse_blockstatus_payload(BDRVNBDState *s, NBDStructuredReplyChunk *chunk, - uint8_t *payload, uint64_t orig_length, - NBDExtent32 *extent, Error **errp) + uint8_t *payload, bool wide, + uint64_t orig_length, + NBDExtent64 *extent, Error **errp) { uint32_t context_id; + uint32_t count; + size_t ext_len = wide ? sizeof(*extent) : sizeof(NBDExtent32); + size_t pay_len = sizeof(context_id) + wide * sizeof(count) + ext_len; /* The server succeeded, so it must have sent [at least] one extent */ - if (chunk->length < sizeof(context_id) + sizeof(*extent)) { + if (chunk->length < pay_len) { error_setg(errp, "Protocol error: invalid payload for " "NBD_REPLY_TYPE_BLOCK_STATUS"); return -EINVAL; @@ -630,8 +640,15 @@ static int nbd_parse_blockstatus_payload(BDRVNBDState *s, return -EINVAL; } - extent->length = payload_advance32(&payload); - extent->flags = payload_advance32(&payload); + if (wide) { + count = payload_advance32(&payload); + extent->length = payload_advance64(&payload); + extent->flags = payload_advance64(&payload); + } else { + count = 0; + extent->length = payload_advance32(&payload); + extent->flags = payload_advance32(&payload); + } if (extent->length == 0) { error_setg(errp, "Protocol error: server sent status chunk with " @@ -652,7 +669,7 @@ static int nbd_parse_blockstatus_payload(BDRVNBDState *s, * (always a safe status, even if it loses information). */ if (s->info.min_block && !QEMU_IS_ALIGNED(extent->length, - s->info.min_block)) { + s->info.min_block)) { trace_nbd_parse_blockstatus_compliance("extent length is unaligned"); if (extent->length > s->info.min_block) { extent->length = QEMU_ALIGN_DOWN(extent->length, @@ -666,13 +683,15 @@ static int nbd_parse_blockstatus_payload(BDRVNBDState *s, /* * We used NBD_CMD_FLAG_REQ_ONE, so the server should not have * sent us any more than one extent, nor should it have included - * status beyond our request in that extent. However, it's easy - * enough to ignore the server's noncompliance without killing the + * status beyond our request in that extent. Furthermore, a wide + * server should have replied with an accurate count (we left + * count at 0 for a narrow server). However, it's easy enough to + * ignore the server's noncompliance without killing the * connection; just ignore trailing extents, and clamp things to * the length of our request. */ - if (chunk->length > sizeof(context_id) + sizeof(*extent)) { - trace_nbd_parse_blockstatus_compliance("more than one extent"); + if (count != wide || chunk->length > pay_len) { + trace_nbd_parse_blockstatus_compliance("unexpected extent count"); } if (extent->length > orig_length) { extent->length = orig_length; @@ -842,9 +861,9 @@ static coroutine_fn int nbd_co_do_receive_one_chunk( } *request_ret = 0; - ret = nbd_receive_replies(s, cookie); + ret = nbd_receive_replies(s, cookie, errp); if (ret < 0) { - error_setg(errp, "Connection closed"); + error_prepend(errp, "Connection closed: "); return -EIO; } assert(s->ioc); @@ -1118,7 +1137,7 @@ nbd_co_receive_cmdread_reply(BDRVNBDState *s, uint64_t cookie, static int coroutine_fn nbd_co_receive_blockstatus_reply(BDRVNBDState *s, uint64_t cookie, - uint64_t length, NBDExtent32 *extent, + uint64_t length, NBDExtent64 *extent, int *request_ret, Error **errp) { NBDReplyChunkIter iter; @@ -1131,11 +1150,17 @@ nbd_co_receive_blockstatus_reply(BDRVNBDState *s, uint64_t cookie, NBD_FOREACH_REPLY_CHUNK(s, iter, cookie, false, NULL, &reply, &payload) { int ret; NBDStructuredReplyChunk *chunk = &reply.structured; + bool wide; assert(nbd_reply_is_structured(&reply)); switch (chunk->type) { + case NBD_REPLY_TYPE_BLOCK_STATUS_EXT: case NBD_REPLY_TYPE_BLOCK_STATUS: + wide = chunk->type == NBD_REPLY_TYPE_BLOCK_STATUS_EXT; + if ((s->info.mode >= NBD_MODE_EXTENDED) != wide) { + trace_nbd_extended_headers_compliance("block_status"); + } if (received) { nbd_channel_error(s, -EINVAL); error_setg(&local_err, "Several BLOCK_STATUS chunks in reply"); @@ -1143,9 +1168,9 @@ nbd_co_receive_blockstatus_reply(BDRVNBDState *s, uint64_t cookie, } received = true; - ret = nbd_parse_blockstatus_payload(s, &reply.structured, - payload, length, extent, - &local_err); + ret = nbd_parse_blockstatus_payload( + s, &reply.structured, payload, wide, + length, extent, &local_err); if (ret < 0) { nbd_channel_error(s, ret); nbd_iter_channel_error(&iter, ret, &local_err); @@ -1375,7 +1400,7 @@ static int coroutine_fn GRAPH_RDLOCK nbd_client_co_block_status( int64_t *pnum, int64_t *map, BlockDriverState **file) { int ret, request_ret; - NBDExtent32 extent = { 0 }; + NBDExtent64 extent = { 0 }; BDRVNBDState *s = (BDRVNBDState *)bs->opaque; Error *local_err = NULL; diff --git a/block/trace-events b/block/trace-events index 925aa554bb..8e789e1f12 100644 --- a/block/trace-events +++ b/block/trace-events @@ -166,6 +166,7 @@ iscsi_xcopy(void *src_lun, uint64_t src_off, void *dst_lun, uint64_t dst_off, ui # nbd.c nbd_parse_blockstatus_compliance(const char *err) "ignoring extra data from non-compliant server: %s" nbd_structured_read_compliance(const char *type) "server sent non-compliant unaligned read %s chunk" +nbd_extended_headers_compliance(const char *type) "server sent non-compliant %s chunk not matching choice of extended headers" nbd_read_reply_entry_fail(int ret, const char *err) "ret = %d, err: %s" nbd_co_request_fail(uint64_t from, uint64_t len, uint64_t handle, uint16_t flags, uint16_t type, const char *name, int ret, const char *err) "Request failed { .from = %" PRIu64", .len = %" PRIu64 ", .handle = %" PRIu64 ", .flags = 0x%" PRIx16 ", .type = %" PRIu16 " (%s) } ret = %d, err: %s" nbd_client_handshake(const char *export_name) "export '%s'" diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt index f5ca25174a..18efb251de 100644 --- a/docs/interop/nbd.txt +++ b/docs/interop/nbd.txt @@ -69,3 +69,4 @@ NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE NBD_CMD_FLAG_FAST_ZERO * 5.2: NBD_CMD_BLOCK_STATUS for "qemu:allocation-depth" * 7.1: NBD_FLAG_CAN_MULTI_CONN for shareable writable exports +* 8.2: NBD_OPT_EXTENDED_HEADERS, NBD_FLAG_BLOCK_STATUS_PAYLOAD diff --git a/include/block/nbd.h b/include/block/nbd.h index 8a765e78df..4e7bd6342f 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -29,6 +29,7 @@ typedef struct NBDExport NBDExport; typedef struct NBDClient NBDClient; typedef struct NBDClientConnection NBDClientConnection; +typedef struct NBDMetaContexts NBDMetaContexts; extern const BlockExportDriver blk_exp_nbd; @@ -76,6 +77,7 @@ typedef struct NBDRequest { uint16_t flags; /* NBD_CMD_FLAG_* */ uint16_t type; /* NBD_CMD_* */ NBDMode mode; /* Determines which network representation to use */ + NBDMetaContexts *contexts; /* Used by NBD_CMD_BLOCK_STATUS */ } NBDRequest; typedef struct NBDSimpleReply { @@ -389,7 +391,8 @@ int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info, Error **errp); int nbd_send_request(QIOChannel *ioc, NBDRequest *request); int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc, - NBDReply *reply, Error **errp); + NBDReply *reply, NBDMode mode, + Error **errp); int nbd_client(int fd); int nbd_disconnect(int fd); int nbd_errno_to_system_errno(int err); diff --git a/nbd/client-connection.c b/nbd/client-connection.c index aa0201b710..f9da67c87e 100644 --- a/nbd/client-connection.c +++ b/nbd/client-connection.c @@ -93,7 +93,7 @@ NBDClientConnection *nbd_client_connection_new(const SocketAddress *saddr, .do_negotiation = do_negotiation, .initial_info.request_sizes = true, - .initial_info.mode = NBD_MODE_STRUCTURED, + .initial_info.mode = NBD_MODE_EXTENDED, .initial_info.base_allocation = true, .initial_info.x_dirty_bitmap = g_strdup(x_dirty_bitmap), .initial_info.name = g_strdup(export_name ?: "") diff --git a/nbd/client.c b/nbd/client.c index cecb0f0437..29ffc609a4 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -953,15 +953,23 @@ static int nbd_start_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds, if (fixedNewStyle) { int result = 0; + if (max_mode >= NBD_MODE_EXTENDED) { + result = nbd_request_simple_option(ioc, + NBD_OPT_EXTENDED_HEADERS, + false, errp); + if (result) { + return result < 0 ? -EINVAL : NBD_MODE_EXTENDED; + } + } if (max_mode >= NBD_MODE_STRUCTURED) { result = nbd_request_simple_option(ioc, NBD_OPT_STRUCTURED_REPLY, false, errp); - if (result < 0) { - return -EINVAL; + if (result) { + return result < 0 ? -EINVAL : NBD_MODE_STRUCTURED; } } - return result ? NBD_MODE_STRUCTURED : NBD_MODE_SIMPLE; + return NBD_MODE_SIMPLE; } else { return NBD_MODE_EXPORT_NAME; } @@ -1034,6 +1042,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds, } switch (info->mode) { + case NBD_MODE_EXTENDED: case NBD_MODE_STRUCTURED: if (base_allocation) { result = nbd_negotiate_simple_meta_context(ioc, info, errp); @@ -1144,7 +1153,7 @@ int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds, *info = NULL; result = nbd_start_negotiate(ioc, tlscreds, hostname, &sioc, - NBD_MODE_STRUCTURED, NULL, errp); + NBD_MODE_EXTENDED, NULL, errp); if (tlscreds && sioc) { ioc = sioc; } @@ -1155,6 +1164,7 @@ int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds, switch ((NBDMode)result) { case NBD_MODE_SIMPLE: case NBD_MODE_STRUCTURED: + case NBD_MODE_EXTENDED: /* newstyle - use NBD_OPT_LIST to populate array, then try * NBD_OPT_INFO on each array member. If structured replies * are enabled, also try NBD_OPT_LIST_META_CONTEXT. */ @@ -1191,7 +1201,7 @@ int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds, break; } - if (result == NBD_MODE_STRUCTURED && + if (result >= NBD_MODE_STRUCTURED && nbd_list_meta_contexts(ioc, &array[i], errp) < 0) { goto out; } @@ -1346,22 +1356,29 @@ int nbd_disconnect(int fd) int nbd_send_request(QIOChannel *ioc, NBDRequest *request) { - uint8_t buf[NBD_REQUEST_SIZE]; + uint8_t buf[NBD_EXTENDED_REQUEST_SIZE]; + size_t len; - assert(request->mode <= NBD_MODE_STRUCTURED); /* TODO handle extended */ - assert(request->len <= UINT32_MAX); trace_nbd_send_request(request->from, request->len, request->cookie, request->flags, request->type, nbd_cmd_lookup(request->type)); - stl_be_p(buf, NBD_REQUEST_MAGIC); stw_be_p(buf + 4, request->flags); stw_be_p(buf + 6, request->type); stq_be_p(buf + 8, request->cookie); stq_be_p(buf + 16, request->from); - stl_be_p(buf + 24, request->len); + if (request->mode >= NBD_MODE_EXTENDED) { + stl_be_p(buf, NBD_EXTENDED_REQUEST_MAGIC); + stq_be_p(buf + 24, request->len); + len = NBD_EXTENDED_REQUEST_SIZE; + } else { + assert(request->len <= UINT32_MAX); + stl_be_p(buf, NBD_REQUEST_MAGIC); + stl_be_p(buf + 24, request->len); + len = NBD_REQUEST_SIZE; + } - return nbd_write(ioc, buf, sizeof(buf), NULL); + return nbd_write(ioc, buf, len, NULL); } /* nbd_receive_simple_reply @@ -1388,30 +1405,36 @@ static int nbd_receive_simple_reply(QIOChannel *ioc, NBDSimpleReply *reply, return 0; } -/* nbd_receive_structured_reply_chunk +/* nbd_receive_reply_chunk_header * Read structured reply chunk except magic field (which should be already - * read). + * read). Normalize into the compact form. * Payload is not read. */ -static int nbd_receive_structured_reply_chunk(QIOChannel *ioc, - NBDStructuredReplyChunk *chunk, - Error **errp) +static int nbd_receive_reply_chunk_header(QIOChannel *ioc, NBDReply *chunk, + Error **errp) { int ret; + size_t len; + uint64_t payload_len; - assert(chunk->magic == NBD_STRUCTURED_REPLY_MAGIC); + if (chunk->magic == NBD_STRUCTURED_REPLY_MAGIC) { + len = sizeof(chunk->structured); + } else { + assert(chunk->magic == NBD_EXTENDED_REPLY_MAGIC); + len = sizeof(chunk->extended); + } ret = nbd_read(ioc, (uint8_t *)chunk + sizeof(chunk->magic), - sizeof(*chunk) - sizeof(chunk->magic), "structured chunk", + len - sizeof(chunk->magic), "structured chunk", errp); if (ret < 0) { return ret; } - chunk->flags = be16_to_cpu(chunk->flags); - chunk->type = be16_to_cpu(chunk->type); - chunk->cookie = be64_to_cpu(chunk->cookie); - chunk->length = be32_to_cpu(chunk->length); + /* flags, type, and cookie occupy same space between forms */ + chunk->structured.flags = be16_to_cpu(chunk->structured.flags); + chunk->structured.type = be16_to_cpu(chunk->structured.type); + chunk->structured.cookie = be64_to_cpu(chunk->structured.cookie); /* * Because we use BLOCK_STATUS with REQ_ONE, and cap READ requests @@ -1419,11 +1442,20 @@ static int nbd_receive_structured_reply_chunk(QIOChannel *ioc, * this. Even if we stopped using REQ_ONE, sane servers will cap * the number of extents they return for block status. */ - if (chunk->length > NBD_MAX_BUFFER_SIZE + sizeof(NBDStructuredReadData)) { + if (chunk->magic == NBD_STRUCTURED_REPLY_MAGIC) { + payload_len = be32_to_cpu(chunk->structured.length); + } else { + /* For now, we are ignoring the extended header offset. */ + payload_len = be64_to_cpu(chunk->extended.length); + chunk->magic = NBD_STRUCTURED_REPLY_MAGIC; + } + if (payload_len > NBD_MAX_BUFFER_SIZE + sizeof(NBDStructuredReadData)) { error_setg(errp, "server chunk %" PRIu32 " (%s) payload is too long", - chunk->type, nbd_rep_lookup(chunk->type)); + chunk->structured.type, + nbd_rep_lookup(chunk->structured.type)); return -EINVAL; } + chunk->structured.length = payload_len; return 0; } @@ -1470,19 +1502,21 @@ nbd_read_eof(BlockDriverState *bs, QIOChannel *ioc, void *buffer, size_t size, /* nbd_receive_reply * - * Decreases bs->in_flight while waiting for a new reply. This yield is where - * we wait indefinitely and the coroutine must be able to be safely reentered - * for nbd_client_attach_aio_context(). + * Wait for a new reply. If this yields, the coroutine must be able to be + * safely reentered for nbd_client_attach_aio_context(). @mode determines + * which reply magic we are expecting, although this normalizes the result + * so that the caller only has to work with compact headers. * * Returns 1 on success - * 0 on eof, when no data was read (errp is not set) - * negative errno on failure (errp is set) + * 0 on eof, when no data was read + * negative errno on failure */ int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc, - NBDReply *reply, Error **errp) + NBDReply *reply, NBDMode mode, Error **errp) { int ret; const char *type; + uint32_t expected; ret = nbd_read_eof(bs, ioc, &reply->magic, sizeof(reply->magic), errp); if (ret <= 0) { @@ -1491,34 +1525,44 @@ int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc, reply->magic = be32_to_cpu(reply->magic); + /* Diagnose but accept wrong-width header */ switch (reply->magic) { case NBD_SIMPLE_REPLY_MAGIC: + if (mode >= NBD_MODE_EXTENDED) { + trace_nbd_receive_wrong_header(reply->magic, + nbd_mode_lookup(mode)); + } ret = nbd_receive_simple_reply(ioc, &reply->simple, errp); if (ret < 0) { - break; + return ret; } trace_nbd_receive_simple_reply(reply->simple.error, nbd_err_lookup(reply->simple.error), reply->cookie); break; case NBD_STRUCTURED_REPLY_MAGIC: - ret = nbd_receive_structured_reply_chunk(ioc, &reply->structured, errp); + case NBD_EXTENDED_REPLY_MAGIC: + expected = mode >= NBD_MODE_EXTENDED ? NBD_EXTENDED_REPLY_MAGIC + : NBD_STRUCTURED_REPLY_MAGIC; + if (reply->magic != expected) { + trace_nbd_receive_wrong_header(reply->magic, + nbd_mode_lookup(mode)); + } + ret = nbd_receive_reply_chunk_header(ioc, reply, errp); if (ret < 0) { - break; + return ret; } type = nbd_reply_type_lookup(reply->structured.type); - trace_nbd_receive_structured_reply_chunk(reply->structured.flags, - reply->structured.type, type, - reply->structured.cookie, - reply->structured.length); + trace_nbd_receive_reply_chunk_header(reply->structured.flags, + reply->structured.type, type, + reply->structured.cookie, + reply->structured.length); break; default: + trace_nbd_receive_wrong_header(reply->magic, nbd_mode_lookup(mode)); error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", reply->magic); return -EINVAL; } - if (ret < 0) { - return ret; - } return 1; } diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h index 133b1d94b5..dfa02f77ee 100644 --- a/nbd/nbd-internal.h +++ b/nbd/nbd-internal.h @@ -34,8 +34,11 @@ * https://github.com/yoe/nbd/blob/master/doc/proto.md */ -/* Size of all NBD_OPT_*, without payload */ +/* Size of all compact NBD_CMD_*, without payload */ #define NBD_REQUEST_SIZE (4 + 2 + 2 + 8 + 8 + 4) +/* Size of all extended NBD_CMD_*, without payload */ +#define NBD_EXTENDED_REQUEST_SIZE (4 + 2 + 2 + 8 + 8 + 8) + /* Size of all NBD_REP_* sent in answer to most NBD_OPT_*, without payload */ #define NBD_REPLY_SIZE (4 + 4 + 8) /* Size of reply to NBD_OPT_EXPORT_NAME */ diff --git a/nbd/server.c b/nbd/server.c index 7a6f95071f..859c163d19 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -105,11 +105,13 @@ struct NBDExport { static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); -/* NBDExportMetaContexts represents a list of contexts to be exported, +/* + * NBDMetaContexts represents a list of meta contexts in use, * as selected by NBD_OPT_SET_META_CONTEXT. Also used for - * NBD_OPT_LIST_META_CONTEXT. */ -typedef struct NBDExportMetaContexts { - NBDExport *exp; + * NBD_OPT_LIST_META_CONTEXT. + */ +struct NBDMetaContexts { + const NBDExport *exp; /* associated export */ size_t count; /* number of negotiated contexts */ bool base_allocation; /* export base:allocation context (block status) */ bool allocation_depth; /* export qemu:allocation-depth */ @@ -117,7 +119,7 @@ typedef struct NBDExportMetaContexts { * export qemu:dirty-bitmap:, * sized by exp->nr_export_bitmaps */ -} NBDExportMetaContexts; +}; struct NBDClient { int refcount; @@ -144,7 +146,7 @@ struct NBDClient { uint32_t check_align; /* If non-zero, check for aligned client requests */ NBDMode mode; - NBDExportMetaContexts export_meta; + NBDMetaContexts contexts; /* Negotiated meta contexts */ uint32_t opt; /* Current option being negotiated */ uint32_t optlen; /* remaining length of data in ioc for the option being @@ -455,10 +457,10 @@ static int nbd_negotiate_handle_list(NBDClient *client, Error **errp) return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); } -static void nbd_check_meta_export(NBDClient *client) +static void nbd_check_meta_export(NBDClient *client, NBDExport *exp) { - if (client->exp != client->export_meta.exp) { - client->export_meta.count = 0; + if (exp != client->contexts.exp) { + client->contexts.count = 0; } } @@ -482,6 +484,10 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes, [10 .. 133] reserved (0) [unless no_zeroes] */ trace_nbd_negotiate_handle_export_name(); + if (client->mode >= NBD_MODE_EXTENDED) { + error_setg(errp, "Extended headers already negotiated"); + return -EINVAL; + } if (client->optlen > NBD_MAX_STRING_SIZE) { error_setg(errp, "Bad length received"); return -EINVAL; @@ -500,11 +506,15 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes, error_setg(errp, "export not found"); return -EINVAL; } + nbd_check_meta_export(client, client->exp); myflags = client->exp->nbdflags; if (client->mode >= NBD_MODE_STRUCTURED) { myflags |= NBD_FLAG_SEND_DF; } + if (client->mode >= NBD_MODE_EXTENDED && client->contexts.count) { + myflags |= NBD_FLAG_BLOCK_STAT_PAYLOAD; + } trace_nbd_negotiate_new_style_size_flags(client->exp->size, myflags); stq_be_p(buf, client->exp->size); stw_be_p(buf + 8, myflags); @@ -517,7 +527,6 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes, QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); blk_exp_ref(&client->exp->common); - nbd_check_meta_export(client); return 0; } @@ -637,6 +646,9 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp) errp, "export '%s' not present", sane_name); } + if (client->opt == NBD_OPT_GO) { + nbd_check_meta_export(client, exp); + } /* Don't bother sending NBD_INFO_NAME unless client requested it */ if (sendname) { @@ -690,6 +702,10 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp) if (client->mode >= NBD_MODE_STRUCTURED) { myflags |= NBD_FLAG_SEND_DF; } + if (client->mode >= NBD_MODE_EXTENDED && + (client->contexts.count || client->opt == NBD_OPT_INFO)) { + myflags |= NBD_FLAG_BLOCK_STAT_PAYLOAD; + } trace_nbd_negotiate_new_style_size_flags(exp->size, myflags); stq_be_p(buf, exp->size); stw_be_p(buf + 8, myflags); @@ -725,7 +741,6 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp) client->check_align = check_align; QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); blk_exp_ref(&client->exp->common); - nbd_check_meta_export(client); rc = 1; } return rc; @@ -848,7 +863,7 @@ static bool nbd_strshift(const char **str, const char *prefix) * Handle queries to 'base' namespace. For now, only the base:allocation * context is available. Return true if @query has been handled. */ -static bool nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta, +static bool nbd_meta_base_query(NBDClient *client, NBDMetaContexts *meta, const char *query) { if (!nbd_strshift(&query, "base:")) { @@ -868,7 +883,7 @@ static bool nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta, * and qemu:allocation-depth contexts are available. Return true if @query * has been handled. */ -static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta, +static bool nbd_meta_qemu_query(NBDClient *client, NBDMetaContexts *meta, const char *query) { size_t i; @@ -934,7 +949,7 @@ static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta, * Return -errno on I/O error, 0 if option was completely handled by * sending a reply about inconsistent lengths, or 1 on success. */ static int nbd_negotiate_meta_query(NBDClient *client, - NBDExportMetaContexts *meta, Error **errp) + NBDMetaContexts *meta, Error **errp) { int ret; g_autofree char *query = NULL; @@ -973,14 +988,14 @@ static int nbd_negotiate_meta_query(NBDClient *client, * Handle NBD_OPT_LIST_META_CONTEXT and NBD_OPT_SET_META_CONTEXT * * Return -errno on I/O error, or 0 if option was completely handled. */ -static int nbd_negotiate_meta_queries(NBDClient *client, - NBDExportMetaContexts *meta, Error **errp) +static int nbd_negotiate_meta_queries(NBDClient *client, Error **errp) { int ret; g_autofree char *export_name = NULL; /* Mark unused to work around https://bugs.llvm.org/show_bug.cgi?id=3888 */ g_autofree G_GNUC_UNUSED bool *bitmaps = NULL; - NBDExportMetaContexts local_meta = {0}; + NBDMetaContexts local_meta = {0}; + NBDMetaContexts *meta; uint32_t nb_queries; size_t i; size_t count = 0; @@ -996,6 +1011,8 @@ static int nbd_negotiate_meta_queries(NBDClient *client, if (client->opt == NBD_OPT_LIST_META_CONTEXT) { /* Only change the caller's meta on SET. */ meta = &local_meta; + } else { + meta = &client->contexts; } g_free(meta->bitmaps); @@ -1264,6 +1281,10 @@ static int nbd_negotiate_options(NBDClient *client, Error **errp) case NBD_OPT_STRUCTURED_REPLY: if (length) { ret = nbd_reject_length(client, false, errp); + } else if (client->mode >= NBD_MODE_EXTENDED) { + ret = nbd_negotiate_send_rep_err( + client, NBD_REP_ERR_EXT_HEADER_REQD, errp, + "extended headers already negotiated"); } else if (client->mode >= NBD_MODE_STRUCTURED) { ret = nbd_negotiate_send_rep_err( client, NBD_REP_ERR_INVALID, errp, @@ -1276,8 +1297,20 @@ static int nbd_negotiate_options(NBDClient *client, Error **errp) case NBD_OPT_LIST_META_CONTEXT: case NBD_OPT_SET_META_CONTEXT: - ret = nbd_negotiate_meta_queries(client, &client->export_meta, - errp); + ret = nbd_negotiate_meta_queries(client, errp); + break; + + case NBD_OPT_EXTENDED_HEADERS: + if (length) { + ret = nbd_reject_length(client, false, errp); + } else if (client->mode >= NBD_MODE_EXTENDED) { + ret = nbd_negotiate_send_rep_err( + client, NBD_REP_ERR_INVALID, errp, + "extended headers already negotiated"); + } else { + ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); + client->mode = NBD_MODE_EXTENDED; + } break; default: @@ -1411,11 +1444,13 @@ nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp) static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *request, Error **errp) { - uint8_t buf[NBD_REQUEST_SIZE]; - uint32_t magic; + uint8_t buf[NBD_EXTENDED_REQUEST_SIZE]; + uint32_t magic, expect; int ret; + size_t size = client->mode >= NBD_MODE_EXTENDED ? + NBD_EXTENDED_REQUEST_SIZE : NBD_REQUEST_SIZE; - ret = nbd_read_eof(client, buf, sizeof(buf), errp); + ret = nbd_read_eof(client, buf, size, errp); if (ret < 0) { return ret; } @@ -1423,13 +1458,21 @@ static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *reque return -EIO; } - /* Request - [ 0 .. 3] magic (NBD_REQUEST_MAGIC) - [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...) - [ 6 .. 7] type (NBD_CMD_READ, ...) - [ 8 .. 15] cookie - [16 .. 23] from - [24 .. 27] len + /* + * Compact request + * [ 0 .. 3] magic (NBD_REQUEST_MAGIC) + * [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...) + * [ 6 .. 7] type (NBD_CMD_READ, ...) + * [ 8 .. 15] cookie + * [16 .. 23] from + * [24 .. 27] len + * Extended request + * [ 0 .. 3] magic (NBD_EXTENDED_REQUEST_MAGIC) + * [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, NBD_CMD_FLAG_PAYLOAD_LEN, ...) + * [ 6 .. 7] type (NBD_CMD_READ, ...) + * [ 8 .. 15] cookie + * [16 .. 23] from + * [24 .. 31] len */ magic = ldl_be_p(buf); @@ -1437,13 +1480,20 @@ static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *reque request->type = lduw_be_p(buf + 6); request->cookie = ldq_be_p(buf + 8); request->from = ldq_be_p(buf + 16); - request->len = (uint32_t)ldl_be_p(buf + 24); /* widen 32 to 64 bits */ + if (client->mode >= NBD_MODE_EXTENDED) { + request->len = ldq_be_p(buf + 24); + expect = NBD_EXTENDED_REQUEST_MAGIC; + } else { + request->len = (uint32_t)ldl_be_p(buf + 24); /* widen 32 to 64 bits */ + expect = NBD_REQUEST_MAGIC; + } trace_nbd_receive_request(magic, request->flags, request->type, request->from, request->len); - if (magic != NBD_REQUEST_MAGIC) { - error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic); + if (magic != expect) { + error_setg(errp, "invalid magic (got 0x%" PRIx32 ", expected 0x%" + PRIx32 ")", magic, expect); return -EINVAL; } return 0; @@ -1474,7 +1524,7 @@ void nbd_client_put(NBDClient *client) QTAILQ_REMOVE(&client->exp->clients, client, next); blk_exp_unref(&client->exp->common); } - g_free(client->export_meta.bitmaps); + g_free(client->contexts.bitmaps); g_free(client); } } @@ -1921,8 +1971,6 @@ static inline void set_be_chunk(NBDClient *client, struct iovec *iov, size_t niov, uint16_t flags, uint16_t type, NBDRequest *request) { - /* TODO - handle structured vs. extended replies */ - NBDStructuredReplyChunk *chunk = iov->iov_base; size_t i, length = 0; for (i = 1; i < niov; i++) { @@ -1930,12 +1978,26 @@ static inline void set_be_chunk(NBDClient *client, struct iovec *iov, } assert(length <= NBD_MAX_BUFFER_SIZE + sizeof(NBDStructuredReadData)); - iov[0].iov_len = sizeof(*chunk); - stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC); - stw_be_p(&chunk->flags, flags); - stw_be_p(&chunk->type, type); - stq_be_p(&chunk->cookie, request->cookie); - stl_be_p(&chunk->length, length); + if (client->mode >= NBD_MODE_EXTENDED) { + NBDExtendedReplyChunk *chunk = iov->iov_base; + + iov[0].iov_len = sizeof(*chunk); + stl_be_p(&chunk->magic, NBD_EXTENDED_REPLY_MAGIC); + stw_be_p(&chunk->flags, flags); + stw_be_p(&chunk->type, type); + stq_be_p(&chunk->cookie, request->cookie); + stq_be_p(&chunk->offset, request->from); + stq_be_p(&chunk->length, length); + } else { + NBDStructuredReplyChunk *chunk = iov->iov_base; + + iov[0].iov_len = sizeof(*chunk); + stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC); + stw_be_p(&chunk->flags, flags); + stw_be_p(&chunk->type, type); + stq_be_p(&chunk->cookie, request->cookie); + stl_be_p(&chunk->length, length); + } } static int coroutine_fn nbd_co_send_chunk_done(NBDClient *client, @@ -2074,20 +2136,24 @@ static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client, } typedef struct NBDExtentArray { - NBDExtent32 *extents; + NBDExtent64 *extents; unsigned int nb_alloc; unsigned int count; uint64_t total_length; + bool extended; bool can_add; bool converted_to_be; } NBDExtentArray; -static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc) +static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc, + NBDMode mode) { NBDExtentArray *ea = g_new0(NBDExtentArray, 1); + assert(mode >= NBD_MODE_STRUCTURED); ea->nb_alloc = nb_alloc; - ea->extents = g_new(NBDExtent32, nb_alloc); + ea->extents = g_new(NBDExtent64, nb_alloc); + ea->extended = mode >= NBD_MODE_EXTENDED; ea->can_add = true; return ea; @@ -2106,15 +2172,36 @@ static void nbd_extent_array_convert_to_be(NBDExtentArray *ea) int i; assert(!ea->converted_to_be); + assert(ea->extended); ea->can_add = false; ea->converted_to_be = true; for (i = 0; i < ea->count; i++) { - ea->extents[i].flags = cpu_to_be32(ea->extents[i].flags); - ea->extents[i].length = cpu_to_be32(ea->extents[i].length); + ea->extents[i].length = cpu_to_be64(ea->extents[i].length); + ea->extents[i].flags = cpu_to_be64(ea->extents[i].flags); } } +/* Further modifications of the array after conversion are abandoned */ +static NBDExtent32 *nbd_extent_array_convert_to_narrow(NBDExtentArray *ea) +{ + int i; + NBDExtent32 *extents = g_new(NBDExtent32, ea->count); + + assert(!ea->converted_to_be); + assert(!ea->extended); + ea->can_add = false; + ea->converted_to_be = true; + + for (i = 0; i < ea->count; i++) { + assert((ea->extents[i].length | ea->extents[i].flags) <= UINT32_MAX); + extents[i].length = cpu_to_be32(ea->extents[i].length); + extents[i].flags = cpu_to_be32(ea->extents[i].flags); + } + + return extents; +} + /* * Add extent to NBDExtentArray. If extent can't be added (no available space), * return -1. @@ -2125,19 +2212,27 @@ static void nbd_extent_array_convert_to_be(NBDExtentArray *ea) * would result in an incorrect range reported to the client) */ static int nbd_extent_array_add(NBDExtentArray *ea, - uint32_t length, uint32_t flags) + uint64_t length, uint32_t flags) { assert(ea->can_add); if (!length) { return 0; } + if (!ea->extended) { + assert(length <= UINT32_MAX); + } /* Extend previous extent if flags are the same */ if (ea->count > 0 && flags == ea->extents[ea->count - 1].flags) { - uint64_t sum = (uint64_t)length + ea->extents[ea->count - 1].length; + uint64_t sum = length + ea->extents[ea->count - 1].length; - if (sum <= UINT32_MAX) { + /* + * sum cannot overflow: the block layer bounds image size at + * 2^63, and ea->extents[].length comes from the block layer. + */ + assert(sum >= length); + if (sum <= UINT32_MAX || ea->extended) { ea->extents[ea->count - 1].length = sum; ea->total_length += length; return 0; @@ -2150,7 +2245,7 @@ static int nbd_extent_array_add(NBDExtentArray *ea, } ea->total_length += length; - ea->extents[ea->count] = (NBDExtent32) {.length = length, .flags = flags}; + ea->extents[ea->count] = (NBDExtent64) {.length = length, .flags = flags}; ea->count++; return 0; @@ -2219,20 +2314,39 @@ nbd_co_send_extents(NBDClient *client, NBDRequest *request, NBDExtentArray *ea, bool last, uint32_t context_id, Error **errp) { NBDReply hdr; - NBDStructuredMeta chunk; - struct iovec iov[] = { - {.iov_base = &hdr}, - {.iov_base = &chunk, .iov_len = sizeof(chunk)}, - {.iov_base = ea->extents, .iov_len = ea->count * sizeof(ea->extents[0])} - }; + NBDStructuredMeta meta; + NBDExtendedMeta meta_ext; + g_autofree NBDExtent32 *extents = NULL; + uint16_t type; + struct iovec iov[] = { {.iov_base = &hdr}, {0}, {0} }; - nbd_extent_array_convert_to_be(ea); + if (client->mode >= NBD_MODE_EXTENDED) { + type = NBD_REPLY_TYPE_BLOCK_STATUS_EXT; + + iov[1].iov_base = &meta_ext; + iov[1].iov_len = sizeof(meta_ext); + stl_be_p(&meta_ext.context_id, context_id); + stl_be_p(&meta_ext.count, ea->count); + + nbd_extent_array_convert_to_be(ea); + iov[2].iov_base = ea->extents; + iov[2].iov_len = ea->count * sizeof(ea->extents[0]); + } else { + type = NBD_REPLY_TYPE_BLOCK_STATUS; + + iov[1].iov_base = &meta; + iov[1].iov_len = sizeof(meta); + stl_be_p(&meta.context_id, context_id); + + extents = nbd_extent_array_convert_to_narrow(ea); + iov[2].iov_base = extents; + iov[2].iov_len = ea->count * sizeof(extents[0]); + } trace_nbd_co_send_extents(request->cookie, ea->count, context_id, ea->total_length, last); - set_be_chunk(client, iov, 3, last ? NBD_REPLY_FLAG_DONE : 0, - NBD_REPLY_TYPE_BLOCK_STATUS, request); - stl_be_p(&chunk.context_id, context_id); + set_be_chunk(client, iov, 3, last ? NBD_REPLY_FLAG_DONE : 0, type, + request); return nbd_co_send_iov(client, iov, 3, errp); } @@ -2241,13 +2355,14 @@ nbd_co_send_extents(NBDClient *client, NBDRequest *request, NBDExtentArray *ea, static int coroutine_fn nbd_co_send_block_status(NBDClient *client, NBDRequest *request, BlockBackend *blk, uint64_t offset, - uint32_t length, bool dont_fragment, + uint64_t length, bool dont_fragment, bool last, uint32_t context_id, Error **errp) { int ret; unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS; - g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents); + g_autoptr(NBDExtentArray) ea = + nbd_extent_array_new(nb_extents, client->mode); if (context_id == NBD_META_ID_BASE_ALLOCATION) { ret = blockstatus_to_extents(blk, offset, length, ea); @@ -2270,11 +2385,12 @@ static void bitmap_to_extents(BdrvDirtyBitmap *bitmap, int64_t start, dirty_start, dirty_count; int64_t end = offset + length; bool full = false; + int64_t bound = es->extended ? INT64_MAX : INT32_MAX; bdrv_dirty_bitmap_lock(bitmap); for (start = offset; - bdrv_dirty_bitmap_next_dirty_area(bitmap, start, end, INT32_MAX, + bdrv_dirty_bitmap_next_dirty_area(bitmap, start, end, bound, &dirty_start, &dirty_count); start = dirty_start + dirty_count) { @@ -2298,18 +2414,103 @@ static int coroutine_fn nbd_co_send_bitmap(NBDClient *client, NBDRequest *request, BdrvDirtyBitmap *bitmap, uint64_t offset, - uint32_t length, bool dont_fragment, + uint64_t length, bool dont_fragment, bool last, uint32_t context_id, Error **errp) { unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS; - g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents); + g_autoptr(NBDExtentArray) ea = + nbd_extent_array_new(nb_extents, client->mode); bitmap_to_extents(bitmap, offset, length, ea); return nbd_co_send_extents(client, request, ea, last, context_id, errp); } +/* + * nbd_co_block_status_payload_read + * Called when a client wants a subset of negotiated contexts via a + * BLOCK_STATUS payload. Check the payload for valid length and + * contents. On success, return 0 with request updated to effective + * length. If request was invalid but all payload consumed, return 0 + * with request->len and request->contexts->count set to 0 (which will + * trigger an appropriate NBD_EINVAL response later on). Return + * negative errno if the payload was not fully consumed. + */ +static int +nbd_co_block_status_payload_read(NBDClient *client, NBDRequest *request, + Error **errp) +{ + uint64_t payload_len = request->len; + g_autofree char *buf = NULL; + size_t count, i, nr_bitmaps; + uint32_t id; + + if (payload_len > NBD_MAX_BUFFER_SIZE) { + error_setg(errp, "len (%" PRIu64 ") is larger than max len (%u)", + request->len, NBD_MAX_BUFFER_SIZE); + return -EINVAL; + } + + assert(client->contexts.exp == client->exp); + nr_bitmaps = client->exp->nr_export_bitmaps; + request->contexts = g_new0(NBDMetaContexts, 1); + request->contexts->exp = client->exp; + + if (payload_len % sizeof(uint32_t) || + payload_len < sizeof(NBDBlockStatusPayload) || + payload_len > (sizeof(NBDBlockStatusPayload) + + sizeof(id) * client->contexts.count)) { + goto skip; + } + + buf = g_malloc(payload_len); + if (nbd_read(client->ioc, buf, payload_len, + "CMD_BLOCK_STATUS data", errp) < 0) { + return -EIO; + } + trace_nbd_co_receive_request_payload_received(request->cookie, + payload_len); + request->contexts->bitmaps = g_new0(bool, nr_bitmaps); + count = (payload_len - sizeof(NBDBlockStatusPayload)) / sizeof(id); + payload_len = 0; + + for (i = 0; i < count; i++) { + id = ldl_be_p(buf + sizeof(NBDBlockStatusPayload) + sizeof(id) * i); + if (id == NBD_META_ID_BASE_ALLOCATION) { + if (!client->contexts.base_allocation || + request->contexts->base_allocation) { + goto skip; + } + request->contexts->base_allocation = true; + } else if (id == NBD_META_ID_ALLOCATION_DEPTH) { + if (!client->contexts.allocation_depth || + request->contexts->allocation_depth) { + goto skip; + } + request->contexts->allocation_depth = true; + } else { + unsigned idx = id - NBD_META_ID_DIRTY_BITMAP; + + if (idx >= nr_bitmaps || !client->contexts.bitmaps[idx] || + request->contexts->bitmaps[idx]) { + goto skip; + } + request->contexts->bitmaps[idx] = true; + } + } + + request->len = ldq_be_p(buf); + request->contexts->count = count; + return 0; + + skip: + trace_nbd_co_receive_block_status_payload_compliance(request->from, + request->len); + request->len = request->contexts->count = 0; + return nbd_drop(client->ioc, payload_len, errp); +} + /* nbd_co_receive_request * Collect a client request. Return 0 if request looks valid, -EIO to drop * connection right away, -EAGAIN to indicate we were interrupted and the @@ -2322,10 +2523,12 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req, Error **errp) { NBDClient *client = req->client; + bool extended_with_payload; bool check_length = false; bool check_rofs = false; bool allocate_buffer = false; - unsigned payload_len = 0; + bool payload_okay = false; + uint64_t payload_len = 0; int valid_flags = NBD_CMD_FLAG_FUA; int ret; @@ -2338,6 +2541,13 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req, trace_nbd_co_receive_request_decode_type(request->cookie, request->type, nbd_cmd_lookup(request->type)); + extended_with_payload = client->mode >= NBD_MODE_EXTENDED && + request->flags & NBD_CMD_FLAG_PAYLOAD_LEN; + if (extended_with_payload) { + payload_len = request->len; + check_length = true; + } + switch (request->type) { case NBD_CMD_DISC: /* Special case: we're going to disconnect without a reply, @@ -2354,6 +2564,15 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req, break; case NBD_CMD_WRITE: + if (client->mode >= NBD_MODE_EXTENDED) { + if (!extended_with_payload) { + /* The client is noncompliant. Trace it, but proceed. */ + trace_nbd_co_receive_ext_payload_compliance(request->from, + request->len); + } + valid_flags |= NBD_CMD_FLAG_PAYLOAD_LEN; + } + payload_okay = true; payload_len = request->len; check_length = true; allocate_buffer = true; @@ -2377,6 +2596,18 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req, break; case NBD_CMD_BLOCK_STATUS: + if (extended_with_payload) { + ret = nbd_co_block_status_payload_read(client, request, errp); + if (ret < 0) { + return ret; + } + /* payload now consumed */ + check_length = false; + payload_len = 0; + valid_flags |= NBD_CMD_FLAG_PAYLOAD_LEN; + } else { + request->contexts = &client->contexts; + } valid_flags |= NBD_CMD_FLAG_REQ_ONE; break; @@ -2395,6 +2626,16 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req, request->len, NBD_MAX_BUFFER_SIZE); return -EINVAL; } + if (payload_len && !payload_okay) { + /* + * For now, we don't support payloads on other commands; but + * we can keep the connection alive by ignoring the payload. + * We will fail the command later with NBD_EINVAL for the use + * of an unsupported flag (and not for access beyond bounds). + */ + assert(request->type != NBD_CMD_WRITE); + request->len = 0; + } if (allocate_buffer) { /* READ, WRITE */ req->data = blk_try_blockalign(client->exp->common.blk, @@ -2405,10 +2646,14 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req, } } if (payload_len) { - /* WRITE */ - assert(req->data); - ret = nbd_read(client->ioc, req->data, payload_len, - "CMD_WRITE data", errp); + if (payload_okay) { + /* WRITE */ + assert(req->data); + ret = nbd_read(client->ioc, req->data, payload_len, + "CMD_WRITE data", errp); + } else { + ret = nbd_drop(client->ioc, payload_len, errp); + } if (ret < 0) { return -EIO; } @@ -2463,6 +2708,8 @@ static coroutine_fn int nbd_send_generic_reply(NBDClient *client, { if (client->mode >= NBD_MODE_STRUCTURED && ret < 0) { return nbd_co_send_chunk_error(client, request, -ret, error_msg, errp); + } else if (client->mode >= NBD_MODE_EXTENDED) { + return nbd_co_send_chunk_done(client, request, errp); } else { return nbd_co_send_simple_reply(client, request, ret < 0 ? -ret : 0, NULL, 0, errp); @@ -2604,16 +2851,18 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, "discard failed", errp); case NBD_CMD_BLOCK_STATUS: - if (!request->len) { - return nbd_send_generic_reply(client, request, -EINVAL, - "need non-zero length", errp); - } - assert(request->len <= UINT32_MAX); - if (client->export_meta.count) { + assert(request->contexts); + assert(client->mode >= NBD_MODE_EXTENDED || + request->len <= UINT32_MAX); + if (request->contexts->count) { bool dont_fragment = request->flags & NBD_CMD_FLAG_REQ_ONE; - int contexts_remaining = client->export_meta.count; + int contexts_remaining = request->contexts->count; - if (client->export_meta.base_allocation) { + if (!request->len) { + return nbd_send_generic_reply(client, request, -EINVAL, + "need non-zero length", errp); + } + if (request->contexts->base_allocation) { ret = nbd_co_send_block_status(client, request, exp->common.blk, request->from, @@ -2626,7 +2875,7 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, } } - if (client->export_meta.allocation_depth) { + if (request->contexts->allocation_depth) { ret = nbd_co_send_block_status(client, request, exp->common.blk, request->from, request->len, @@ -2639,8 +2888,9 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, } } + assert(request->contexts->exp == client->exp); for (i = 0; i < client->exp->nr_export_bitmaps; i++) { - if (!client->export_meta.bitmaps[i]) { + if (!request->contexts->bitmaps[i]) { continue; } ret = nbd_co_send_bitmap(client, request, @@ -2656,6 +2906,10 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, assert(!contexts_remaining); return 0; + } else if (client->contexts.count) { + return nbd_send_generic_reply(client, request, -EINVAL, + "CMD_BLOCK_STATUS payload not valid", + errp); } else { return nbd_send_generic_reply(client, request, -EINVAL, "CMD_BLOCK_STATUS not negotiated", @@ -2734,13 +2988,19 @@ static coroutine_fn void nbd_trip(void *opaque) } else { ret = nbd_handle_request(client, &request, req->data, &local_err); } + if (request.contexts && request.contexts != &client->contexts) { + assert(request.type == NBD_CMD_BLOCK_STATUS); + g_free(request.contexts->bitmaps); + g_free(request.contexts); + } if (ret < 0) { error_prepend(&local_err, "Failed to send reply: "); goto disconnect; } - /* We must disconnect after NBD_CMD_WRITE if we did not - * read the payload. + /* + * We must disconnect after NBD_CMD_WRITE or BLOCK_STATUS with + * payload if we did not read the payload. */ if (!req->complete) { error_setg(&local_err, "Request handling failed in intermediate state"); diff --git a/nbd/trace-events b/nbd/trace-events index f9dccfcfb4..00ae3216a1 100644 --- a/nbd/trace-events +++ b/nbd/trace-events @@ -33,7 +33,8 @@ nbd_client_clear_queue(void) "Clearing NBD queue" nbd_client_clear_socket(void) "Clearing NBD socket" nbd_send_request(uint64_t from, uint64_t len, uint64_t cookie, uint16_t flags, uint16_t type, const char *name) "Sending request to server: { .from = %" PRIu64", .len = %" PRIu64 ", .cookie = %" PRIu64 ", .flags = 0x%" PRIx16 ", .type = %" PRIu16 " (%s) }" nbd_receive_simple_reply(int32_t error, const char *errname, uint64_t cookie) "Got simple reply: { .error = %" PRId32 " (%s), cookie = %" PRIu64" }" -nbd_receive_structured_reply_chunk(uint16_t flags, uint16_t type, const char *name, uint64_t cookie, uint32_t length) "Got structured reply chunk: { flags = 0x%" PRIx16 ", type = %d (%s), cookie = %" PRIu64 ", length = %" PRIu32 " }" +nbd_receive_reply_chunk_header(uint16_t flags, uint16_t type, const char *name, uint64_t cookie, uint32_t length) "Got reply chunk header: { flags = 0x%" PRIx16 ", type = %" PRIu16 " (%s), cookie = %" PRIu64 ", length = %" PRIu32 " }" +nbd_receive_wrong_header(uint32_t magic, const char *mode) "Server sent unexpected magic 0x%" PRIx32 " for negotiated mode %s" # common.c nbd_unknown_error(int err) "Squashing unexpected error %d to EINVAL" @@ -69,8 +70,10 @@ nbd_co_send_chunk_read(uint64_t cookie, uint64_t offset, void *data, uint64_t si nbd_co_send_chunk_read_hole(uint64_t cookie, uint64_t offset, uint64_t size) "Send structured read hole reply: cookie = %" PRIu64 ", offset = %" PRIu64 ", len = %" PRIu64 nbd_co_send_extents(uint64_t cookie, unsigned int extents, uint32_t id, uint64_t length, int last) "Send block status reply: cookie = %" PRIu64 ", extents = %u, context = %d (extents cover %" PRIu64 " bytes, last chunk = %d)" nbd_co_send_chunk_error(uint64_t cookie, int err, const char *errname, const char *msg) "Send structured error reply: cookie = %" PRIu64 ", error = %d (%s), msg = '%s'" +nbd_co_receive_block_status_payload_compliance(uint64_t from, uint64_t len) "client sent unusable block status payload: from=0x%" PRIx64 ", len=0x%" PRIx64 nbd_co_receive_request_decode_type(uint64_t cookie, uint16_t type, const char *name) "Decoding type: cookie = %" PRIu64 ", type = %" PRIu16 " (%s)" nbd_co_receive_request_payload_received(uint64_t cookie, uint64_t len) "Payload received: cookie = %" PRIu64 ", len = %" PRIu64 +nbd_co_receive_ext_payload_compliance(uint64_t from, uint64_t len) "client sent non-compliant write without payload flag: from=0x%" PRIx64 ", len=0x%" PRIx64 nbd_co_receive_align_compliance(const char *op, uint64_t from, uint64_t len, uint32_t align) "client sent non-compliant unaligned %s request: from=0x%" PRIx64 ", len=0x%" PRIx64 ", align=0x%" PRIx32 nbd_trip(void) "Reading request" diff --git a/qemu-nbd.c b/qemu-nbd.c index 54faa87a0c..186e6468b1 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -219,6 +219,7 @@ static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls, [NBD_FLAG_SEND_RESIZE_BIT] = "resize", [NBD_FLAG_SEND_CACHE_BIT] = "cache", [NBD_FLAG_SEND_FAST_ZERO_BIT] = "fast-zero", + [NBD_FLAG_BLOCK_STAT_PAYLOAD_BIT] = "block-status-payload", }; printf(" size: %" PRIu64 "\n", list[i].size); @@ -235,6 +236,9 @@ static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls, printf(" opt block: %u\n", list[i].opt_block); printf(" max block: %u\n", list[i].max_block); } + printf(" transaction size: %s\n", + list[i].mode >= NBD_MODE_EXTENDED ? + "64-bit" : "32-bit"); if (list[i].n_contexts) { printf(" available meta contexts: %d\n", list[i].n_contexts); for (j = 0; j < list[i].n_contexts; j++) { diff --git a/tests/qemu-iotests/223.out b/tests/qemu-iotests/223.out index 86a37014d0..e5e7f42caa 100644 --- a/tests/qemu-iotests/223.out +++ b/tests/qemu-iotests/223.out @@ -83,29 +83,32 @@ exports available: 0 exports available: 3 export: 'n' size: 4194304 - flags: 0x58f ( readonly flush fua df multi cache ) + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) min block: 1 opt block: 4096 max block: 33554432 + transaction size: 64-bit available meta contexts: 2 base:allocation qemu:dirty-bitmap:b export: 'n2' description: some text size: 4194304 - flags: 0xded ( flush fua trim zeroes df multi cache fast-zero ) + flags: 0x1ded ( flush fua trim zeroes df multi cache fast-zero block-status-payload ) min block: 1 opt block: 4096 max block: 33554432 + transaction size: 64-bit available meta contexts: 2 base:allocation qemu:dirty-bitmap:b2 export: 'n3' size: 4194304 - flags: 0x58f ( readonly flush fua df multi cache ) + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) min block: 1 opt block: 4096 max block: 33554432 + transaction size: 64-bit available meta contexts: 2 base:allocation qemu:dirty-bitmap:b3 @@ -202,29 +205,32 @@ exports available: 0 exports available: 3 export: 'n' size: 4194304 - flags: 0x58f ( readonly flush fua df multi cache ) + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) min block: 1 opt block: 4096 max block: 33554432 + transaction size: 64-bit available meta contexts: 2 base:allocation qemu:dirty-bitmap:b export: 'n2' description: some text size: 4194304 - flags: 0xded ( flush fua trim zeroes df multi cache fast-zero ) + flags: 0x1ded ( flush fua trim zeroes df multi cache fast-zero block-status-payload ) min block: 1 opt block: 4096 max block: 33554432 + transaction size: 64-bit available meta contexts: 2 base:allocation qemu:dirty-bitmap:b2 export: 'n3' size: 4194304 - flags: 0x58f ( readonly flush fua df multi cache ) + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) min block: 1 opt block: 4096 max block: 33554432 + transaction size: 64-bit available meta contexts: 2 base:allocation qemu:dirty-bitmap:b3 diff --git a/tests/qemu-iotests/233.out b/tests/qemu-iotests/233.out index 237c82767e..1910f7df20 100644 --- a/tests/qemu-iotests/233.out +++ b/tests/qemu-iotests/233.out @@ -39,6 +39,7 @@ exports available: 1 export: '' size: 67108864 min block: 1 + transaction size: 64-bit == check TLS fail over TCP with mismatched hostname == qemu-img: Could not open 'driver=nbd,host=localhost,port=PORT,tls-creds=tls0': Certificate does not match the hostname localhost @@ -53,6 +54,7 @@ exports available: 1 export: '' size: 67108864 min block: 1 + transaction size: 64-bit == check TLS with different CA fails == qemu-img: Could not open 'driver=nbd,host=127.0.0.1,port=PORT,tls-creds=tls0': The certificate hasn't got a known issuer @@ -83,6 +85,7 @@ exports available: 1 export: '' size: 67108864 min block: 1 + transaction size: 64-bit == check TLS works over UNIX with PSK == image: nbd+unix://?socket=SOCK_DIR/qemu-nbd.sock @@ -93,6 +96,7 @@ exports available: 1 export: '' size: 67108864 min block: 1 + transaction size: 64-bit == check TLS fails over UNIX with mismatch PSK == qemu-img: Could not open 'driver=nbd,path=SOCK_DIR/qemu-nbd.sock,tls-creds=tls0': TLS handshake failed: The TLS connection was non-properly terminated. diff --git a/tests/qemu-iotests/241.out b/tests/qemu-iotests/241.out index 7946c286d5..7267cd2997 100644 --- a/tests/qemu-iotests/241.out +++ b/tests/qemu-iotests/241.out @@ -6,6 +6,7 @@ exports available: 1 export: '' size: 1024 min block: 1 + transaction size: 64-bit [{ "start": 0, "length": 1000, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}, { "start": 1000, "length": 24, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET}] 1 KiB (0x400) bytes allocated at offset 0 bytes (0x0) @@ -16,6 +17,7 @@ exports available: 1 export: '' size: 1024 min block: 512 + transaction size: 64-bit [{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}] 1 KiB (0x400) bytes allocated at offset 0 bytes (0x0) WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. @@ -28,6 +30,7 @@ exports available: 1 export: '' size: 1024 min block: 1 + transaction size: 64-bit [{ "start": 0, "length": 1000, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}, { "start": 1000, "length": 24, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET}] 1 KiB (0x400) bytes allocated at offset 0 bytes (0x0) diff --git a/tests/qemu-iotests/307.out b/tests/qemu-iotests/307.out index 390f05d1b7..f645f3315f 100644 --- a/tests/qemu-iotests/307.out +++ b/tests/qemu-iotests/307.out @@ -15,10 +15,11 @@ wrote 4096/4096 bytes at offset 0 exports available: 1 export: 'fmt' size: 67108864 - flags: 0x58f ( readonly flush fua df multi cache ) + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) min block: XXX opt block: XXX max block: XXX + transaction size: 64-bit available meta contexts: 1 base:allocation @@ -43,10 +44,11 @@ exports available: 1 exports available: 1 export: 'fmt' size: 67108864 - flags: 0x58f ( readonly flush fua df multi cache ) + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) min block: XXX opt block: XXX max block: XXX + transaction size: 64-bit available meta contexts: 1 base:allocation @@ -74,19 +76,21 @@ exports available: 1 exports available: 2 export: 'fmt' size: 67108864 - flags: 0x58f ( readonly flush fua df multi cache ) + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) min block: XXX opt block: XXX max block: XXX + transaction size: 64-bit available meta contexts: 1 base:allocation export: 'export1' description: This is the writable second export size: 67108864 - flags: 0xded ( flush fua trim zeroes df multi cache fast-zero ) + flags: 0x1ded ( flush fua trim zeroes df multi cache fast-zero block-status-payload ) min block: XXX opt block: XXX max block: XXX + transaction size: 64-bit available meta contexts: 1 base:allocation @@ -109,10 +113,11 @@ exports available: 1 export: 'export1' description: This is the writable second export size: 67108864 - flags: 0xded ( flush fua trim zeroes df multi cache fast-zero ) + flags: 0x1ded ( flush fua trim zeroes df multi cache fast-zero block-status-payload ) min block: XXX opt block: XXX max block: XXX + transaction size: 64-bit available meta contexts: 1 base:allocation diff --git a/tests/qemu-iotests/tests/nbd-qemu-allocation.out b/tests/qemu-iotests/tests/nbd-qemu-allocation.out index 138eb09c6d..56b57c69ed 100644 --- a/tests/qemu-iotests/tests/nbd-qemu-allocation.out +++ b/tests/qemu-iotests/tests/nbd-qemu-allocation.out @@ -17,10 +17,11 @@ wrote 2097152/2097152 bytes at offset 1048576 exports available: 1 export: '' size: 4194304 - flags: 0x48f ( readonly flush fua df cache ) + flags: 0x148f ( readonly flush fua df cache block-status-payload ) min block: 1 opt block: 4096 max block: 33554432 + transaction size: 64-bit available meta contexts: 2 base:allocation qemu:allocation-depth