From dd3e8ac413a74a58d6a3ba16a26952f84370fcff Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 12 Mar 2012 15:23:13 +0100 Subject: [PATCH 1/7] nbd: avoid out of bounds access to recv_coroutine array This can happen with a buggy or malicious server. Reported-by: Michael Tokarev Signed-off-by: Paolo Bonzini --- block/nbd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/block/nbd.c b/block/nbd.c index 161b299855..9972cdb655 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -150,7 +150,7 @@ static int nbd_have_request(void *opaque) static void nbd_reply_ready(void *opaque) { BDRVNBDState *s = opaque; - int i; + uint64_t i; if (s->reply.handle == 0) { /* No reply already in flight. Fetch a header. */ @@ -164,6 +164,10 @@ static void nbd_reply_ready(void *opaque) * handler acts as a synchronization point and ensures that only * one coroutine is called until the reply finishes. */ i = HANDLE_TO_INDEX(s, s->reply.handle); + if (i >= MAX_NBD_REQUESTS) { + goto fail; + } + if (s->recv_coroutine[i]) { qemu_coroutine_enter(s->recv_coroutine[i], NULL); return; From 94e7340b5db8bce7866e44e700ffa8fd26585c7e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 7 Mar 2012 11:25:01 +0100 Subject: [PATCH 2/7] nbd: consistently use ssize_t GCC (pedantically, but correctly) considers that a negative ssize_t may become positive when casted to int. This may cause uninitialized variable warnings when a function returns such a negative ssize_t and is inlined. Propagate ssize_t return types to avoid this. Signed-off-by: Paolo Bonzini --- nbd.c | 22 ++++++++++------------ nbd.h | 4 ++-- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/nbd.c b/nbd.c index 406e555bc6..4a5849c91f 100644 --- a/nbd.c +++ b/nbd.c @@ -470,7 +470,7 @@ int nbd_client(int fd) } #endif -int nbd_send_request(int csock, struct nbd_request *request) +ssize_t nbd_send_request(int csock, struct nbd_request *request) { uint8_t buf[4 + 4 + 8 + 8 + 4]; @@ -492,7 +492,7 @@ int nbd_send_request(int csock, struct nbd_request *request) return 0; } -static int nbd_receive_request(int csock, struct nbd_request *request) +static ssize_t nbd_receive_request(int csock, struct nbd_request *request) { uint8_t buf[4 + 4 + 8 + 8 + 4]; uint32_t magic; @@ -529,13 +529,11 @@ static int nbd_receive_request(int csock, struct nbd_request *request) return 0; } -int nbd_receive_reply(int csock, struct nbd_reply *reply) +ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply) { uint8_t buf[NBD_REPLY_SIZE]; uint32_t magic; - memset(buf, 0xAA, sizeof(buf)); - if (read_sync(csock, buf, sizeof(buf)) != sizeof(buf)) { LOG("read failed"); errno = EINVAL; @@ -564,7 +562,7 @@ int nbd_receive_reply(int csock, struct nbd_reply *reply) return 0; } -static int nbd_send_reply(int csock, struct nbd_reply *reply) +static ssize_t nbd_send_reply(int csock, struct nbd_reply *reply) { uint8_t buf[4 + 4 + 8]; @@ -702,12 +700,12 @@ static int nbd_can_read(void *opaque); static void nbd_read(void *opaque); static void nbd_restart_write(void *opaque); -static int nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply, - int len) +static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply, + int len) { NBDClient *client = req->client; int csock = client->sock; - int rc, ret; + ssize_t rc, ret; qemu_co_mutex_lock(&client->send_lock); qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, @@ -741,11 +739,11 @@ static int nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply, return rc; } -static int nbd_co_receive_request(NBDRequest *req, struct nbd_request *request) +static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *request) { NBDClient *client = req->client; int csock = client->sock; - int rc; + ssize_t rc; client->recv_coroutine = qemu_coroutine_self(); if (nbd_receive_request(csock, request) == -1) { @@ -792,7 +790,7 @@ static void nbd_trip(void *opaque) NBDExport *exp = client->exp; struct nbd_request request; struct nbd_reply reply; - int ret; + ssize_t ret; TRACE("Reading request."); diff --git a/nbd.h b/nbd.h index a8382f096c..217a82d80d 100644 --- a/nbd.h +++ b/nbd.h @@ -70,8 +70,8 @@ int unix_socket_incoming(const char *path); int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, off_t *size, size_t *blocksize); int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize); -int nbd_send_request(int csock, struct nbd_request *request); -int nbd_receive_reply(int csock, struct nbd_reply *reply); +ssize_t nbd_send_request(int csock, struct nbd_request *request); +ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply); int nbd_client(int fd); int nbd_disconnect(int fd); From fc19f8a02e45c4d8ad24dd7eb374330b03dfc28e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 7 Mar 2012 11:05:34 +0100 Subject: [PATCH 3/7] nbd: consistently check for <0 or >=0 This prepares for the following patch, which changes -1 return values to negative errno. Signed-off-by: Paolo Bonzini --- block/nbd.c | 22 +++++++++++++++------- nbd.c | 48 +++++++++++++++++++++++++----------------------- qemu-nbd.c | 26 +++++++++++++------------- 3 files changed, 53 insertions(+), 43 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index 9972cdb655..25e52689d1 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -197,7 +197,7 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, nbd_have_request, NULL, s); rc = nbd_send_request(s->sock, request); - if (rc != -1 && iov) { + if (rc >= 0 && iov) { ret = qemu_co_sendv(s->sock, iov, request->len, offset); if (ret != request->len) { errno = -EIO; @@ -260,7 +260,7 @@ static int nbd_establish_connection(BlockDriverState *bs) } /* Failed to establish connection */ - if (sock == -1) { + if (sock < 0) { logout("Failed to establish connection to NBD server\n"); return -errno; } @@ -268,7 +268,7 @@ static int nbd_establish_connection(BlockDriverState *bs) /* NBD handshake */ ret = nbd_receive_negotiate(sock, s->export_name, &s->nbdflags, &size, &blocksize); - if (ret == -1) { + if (ret < 0) { logout("Failed to negotiate with the NBD server\n"); closesocket(sock); return -errno; @@ -331,13 +331,15 @@ static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num, BDRVNBDState *s = bs->opaque; struct nbd_request request; struct nbd_reply reply; + ssize_t ret; request.type = NBD_CMD_READ; request.from = sector_num * 512; request.len = nb_sectors * 512; nbd_coroutine_start(s, &request); - if (nbd_co_send_request(s, &request, NULL, 0) == -1) { + ret = nbd_co_send_request(s, &request, NULL, 0); + if (ret < 0) { reply.error = errno; } else { nbd_co_receive_reply(s, &request, &reply, qiov->iov, offset); @@ -354,6 +356,7 @@ static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num, BDRVNBDState *s = bs->opaque; struct nbd_request request; struct nbd_reply reply; + ssize_t ret; request.type = NBD_CMD_WRITE; if (!bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) { @@ -364,7 +367,8 @@ static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num, request.len = nb_sectors * 512; nbd_coroutine_start(s, &request); - if (nbd_co_send_request(s, &request, qiov->iov, offset) == -1) { + ret = nbd_co_send_request(s, &request, qiov->iov, offset); + if (ret < 0) { reply.error = errno; } else { nbd_co_receive_reply(s, &request, &reply, NULL, 0); @@ -416,6 +420,7 @@ static int nbd_co_flush(BlockDriverState *bs) BDRVNBDState *s = bs->opaque; struct nbd_request request; struct nbd_reply reply; + ssize_t ret; if (!(s->nbdflags & NBD_FLAG_SEND_FLUSH)) { return 0; @@ -430,7 +435,8 @@ static int nbd_co_flush(BlockDriverState *bs) request.len = 0; nbd_coroutine_start(s, &request); - if (nbd_co_send_request(s, &request, NULL, 0) == -1) { + ret = nbd_co_send_request(s, &request, NULL, 0); + if (ret < 0) { reply.error = errno; } else { nbd_co_receive_reply(s, &request, &reply, NULL, 0); @@ -445,6 +451,7 @@ static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num, BDRVNBDState *s = bs->opaque; struct nbd_request request; struct nbd_reply reply; + ssize_t ret; if (!(s->nbdflags & NBD_FLAG_SEND_TRIM)) { return 0; @@ -454,7 +461,8 @@ static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num, request.len = nb_sectors * 512; nbd_coroutine_start(s, &request); - if (nbd_co_send_request(s, &request, NULL, 0) == -1) { + ret = nbd_co_send_request(s, &request, NULL, 0); + if (ret < 0) { reply.error = errno; } else { nbd_co_receive_reply(s, &request, &reply, NULL, 0); diff --git a/nbd.c b/nbd.c index 4a5849c91f..9832aa02c5 100644 --- a/nbd.c +++ b/nbd.c @@ -102,12 +102,16 @@ size_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read) len = send(fd, buffer + offset, size - offset, 0); } - if (len == -1) + if (len < 0) { errno = socket_error(); - /* recoverable error */ - if (len == -1 && (errno == EAGAIN || errno == EINTR)) { - continue; + /* recoverable error */ + if (errno == EINTR || errno == EAGAIN) { + continue; + } + + /* unrecoverable error */ + return 0; } /* eof */ @@ -115,11 +119,6 @@ size_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read) break; } - /* unrecoverable error */ - if (len == -1) { - return 0; - } - offset += len; } @@ -364,7 +363,7 @@ int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize) { TRACE("Setting NBD socket"); - if (ioctl(fd, NBD_SET_SOCK, csock) == -1) { + if (ioctl(fd, NBD_SET_SOCK, csock) < 0) { int serrno = errno; LOG("Failed to set NBD socket"); errno = serrno; @@ -373,7 +372,7 @@ int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize) TRACE("Setting block size to %lu", (unsigned long)blocksize); - if (ioctl(fd, NBD_SET_BLKSIZE, blocksize) == -1) { + if (ioctl(fd, NBD_SET_BLKSIZE, blocksize) < 0) { int serrno = errno; LOG("Failed setting NBD block size"); errno = serrno; @@ -382,7 +381,7 @@ int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize) TRACE("Setting size to %zd block(s)", (size_t)(size / blocksize)); - if (ioctl(fd, NBD_SET_SIZE_BLOCKS, size / blocksize) == -1) { + if (ioctl(fd, NBD_SET_SIZE_BLOCKS, size / blocksize) < 0) { int serrno = errno; LOG("Failed setting size (in blocks)"); errno = serrno; @@ -430,7 +429,7 @@ int nbd_client(int fd) TRACE("Doing NBD loop"); ret = ioctl(fd, NBD_DO_IT); - if (ret == -1 && errno == EPIPE) { + if (ret < 0 && errno == EPIPE) { /* NBD_DO_IT normally returns EPIPE when someone has disconnected * the socket via NBD_DISCONNECT. We do not want to return 1 in * that case. @@ -714,20 +713,20 @@ static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply, if (!len) { rc = nbd_send_reply(csock, reply); - if (rc == -1) { + if (rc < 0) { rc = -errno; } } else { socket_set_cork(csock, 1); rc = nbd_send_reply(csock, reply); - if (rc != -1) { + if (rc >= 0) { ret = qemu_co_send(csock, req->data, len); if (ret != len) { errno = EIO; rc = -1; } } - if (rc == -1) { + if (rc < 0) { rc = -errno; } socket_set_cork(csock, 0); @@ -746,7 +745,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque ssize_t rc; client->recv_coroutine = qemu_coroutine_self(); - if (nbd_receive_request(csock, request) == -1) { + if (nbd_receive_request(csock, request) < 0) { rc = -EIO; goto out; } @@ -860,8 +859,9 @@ static void nbd_trip(void *opaque) } } - if (nbd_co_send_reply(req, &reply, 0) < 0) + if (nbd_co_send_reply(req, &reply, 0) < 0) { goto out; + } break; case NBD_CMD_DISC: TRACE("Request type is DISCONNECT"); @@ -875,9 +875,9 @@ static void nbd_trip(void *opaque) LOG("flush failed"); reply.error = -ret; } - - if (nbd_co_send_reply(req, &reply, 0) < 0) + if (nbd_co_send_reply(req, &reply, 0) < 0) { goto out; + } break; case NBD_CMD_TRIM: TRACE("Request type is TRIM"); @@ -887,16 +887,18 @@ static void nbd_trip(void *opaque) LOG("discard failed"); reply.error = -ret; } - if (nbd_co_send_reply(req, &reply, 0) < 0) + if (nbd_co_send_reply(req, &reply, 0) < 0) { goto out; + } break; default: LOG("invalid request type (%u) received", request.type); invalid_request: reply.error = -EINVAL; error_reply: - if (nbd_co_send_reply(req, &reply, 0) == -1) + if (nbd_co_send_reply(req, &reply, 0) < 0) { goto out; + } break; } @@ -939,7 +941,7 @@ NBDClient *nbd_client_new(NBDExport *exp, int csock, void (*close)(NBDClient *)) { NBDClient *client; - if (nbd_send_negotiate(csock, exp->size, exp->nbdflags) == -1) { + if (nbd_send_negotiate(csock, exp->size, exp->nbdflags) < 0) { return NULL; } client = g_malloc0(sizeof(NBDClient)); diff --git a/qemu-nbd.c b/qemu-nbd.c index d4e70410fc..19cfb04d96 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -186,7 +186,7 @@ static void *show_parts(void *arg) * modprobe nbd max_part=63 */ nbd = open(device, O_RDWR); - if (nbd != -1) { + if (nbd >= 0) { close(nbd); } return NULL; @@ -203,25 +203,25 @@ static void *nbd_client_thread(void *arg) pthread_t show_parts_thread; sock = unix_socket_outgoing(sockpath); - if (sock == -1) { + if (sock < 0) { goto out; } ret = nbd_receive_negotiate(sock, NULL, &nbdflags, &size, &blocksize); - if (ret == -1) { + if (ret < 0) { goto out; } fd = open(device, O_RDWR); - if (fd == -1) { + if (fd < 0) { /* Linux-only, we can use %m in printf. */ fprintf(stderr, "Failed to open %s: %m", device); goto out; } ret = nbd_init(fd, sock, nbdflags, size, blocksize); - if (ret == -1) { + if (ret < 0) { goto out; } @@ -268,7 +268,7 @@ static void nbd_accept(void *opaque) int fd = accept(server_fd, (struct sockaddr *)&addr, &addr_len); nbd_started = true; - if (fd != -1 && nbd_client_new(exp, fd, nbd_client_closed)) { + if (fd >= 0 && nbd_client_new(exp, fd, nbd_client_closed)) { nb_fds++; } } @@ -410,9 +410,9 @@ int main(int argc, char **argv) if (disconnect) { fd = open(argv[optind], O_RDWR); - if (fd == -1) + if (fd < 0) { err(EXIT_FAILURE, "Cannot open %s", argv[optind]); - + } nbd_disconnect(fd); close(fd); @@ -427,7 +427,7 @@ int main(int argc, char **argv) pid_t pid; int ret; - if (qemu_pipe(stderr_fd) == -1) { + if (qemu_pipe(stderr_fd) < 0) { err(EXIT_FAILURE, "Error setting up communication pipe"); } @@ -441,7 +441,7 @@ int main(int argc, char **argv) /* Temporarily redirect stderr to the parent's pipe... */ dup2(stderr_fd[1], STDERR_FILENO); - if (ret == -1) { + if (ret < 0) { err(EXIT_FAILURE, "Failed to daemonize"); } @@ -459,11 +459,11 @@ int main(int argc, char **argv) while ((ret = read(stderr_fd[0], buf, 1024)) > 0) { errors = true; ret = qemu_write_full(STDERR_FILENO, buf, ret); - if (ret == -1) { + if (ret < 0) { exit(EXIT_FAILURE); } } - if (ret == -1) { + if (ret < 0) { err(EXIT_FAILURE, "Cannot read from daemon"); } @@ -504,7 +504,7 @@ int main(int argc, char **argv) fd = tcp_socket_incoming(bindto, port); } - if (fd == -1) { + if (fd < 0) { return 1; } From 185b43386ad999c80bdc58e41b87f05e5b3e8463 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 5 Mar 2012 08:56:10 +0100 Subject: [PATCH 4/7] nbd: consistently return negative errno values In the next patch we need to look at the return code of nbd_wr_sync. To avoid percolating the socket_error() ugliness all around, let's handle errors by returning negative errno values. Signed-off-by: Paolo Bonzini --- block/nbd.c | 13 ++--- nbd.c | 159 ++++++++++++++++++++++++++-------------------------- nbd.h | 2 +- qemu-nbd.c | 15 ++--- 4 files changed, 93 insertions(+), 96 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index 25e52689d1..1b0e384c39 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -200,8 +200,7 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, if (rc >= 0 && iov) { ret = qemu_co_sendv(s->sock, iov, request->len, offset); if (ret != request->len) { - errno = -EIO; - rc = -1; + return -EIO; } } qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL, @@ -271,7 +270,7 @@ static int nbd_establish_connection(BlockDriverState *bs) if (ret < 0) { logout("Failed to negotiate with the NBD server\n"); closesocket(sock); - return -errno; + return ret; } /* Now that we're connected, set the socket to be non-blocking and @@ -340,7 +339,7 @@ static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num, nbd_coroutine_start(s, &request); ret = nbd_co_send_request(s, &request, NULL, 0); if (ret < 0) { - reply.error = errno; + reply.error = -ret; } else { nbd_co_receive_reply(s, &request, &reply, qiov->iov, offset); } @@ -369,7 +368,7 @@ static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num, nbd_coroutine_start(s, &request); ret = nbd_co_send_request(s, &request, qiov->iov, offset); if (ret < 0) { - reply.error = errno; + reply.error = -ret; } else { nbd_co_receive_reply(s, &request, &reply, NULL, 0); } @@ -437,7 +436,7 @@ static int nbd_co_flush(BlockDriverState *bs) nbd_coroutine_start(s, &request); ret = nbd_co_send_request(s, &request, NULL, 0); if (ret < 0) { - reply.error = errno; + reply.error = -ret; } else { nbd_co_receive_reply(s, &request, &reply, NULL, 0); } @@ -463,7 +462,7 @@ static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num, nbd_coroutine_start(s, &request); ret = nbd_co_send_request(s, &request, NULL, 0); if (ret < 0) { - reply.error = errno; + reply.error = -ret; } else { nbd_co_receive_reply(s, &request, &reply, NULL, 0); } diff --git a/nbd.c b/nbd.c index 9832aa02c5..b31b3b2a4f 100644 --- a/nbd.c +++ b/nbd.c @@ -81,9 +81,10 @@ #define read_sync(fd, buffer, size) nbd_wr_sync(fd, buffer, size, true) #define write_sync(fd, buffer, size) nbd_wr_sync(fd, buffer, size, false) -size_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read) +ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read) { size_t offset = 0; + int err; if (qemu_in_coroutine()) { if (do_read) { @@ -103,15 +104,15 @@ size_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read) } if (len < 0) { - errno = socket_error(); + err = socket_error(); /* recoverable error */ - if (errno == EINTR || errno == EAGAIN) { + if (err == EINTR || err == EAGAIN) { continue; } /* unrecoverable error */ - return 0; + return -err; } /* eof */ @@ -192,6 +193,7 @@ int unix_socket_outgoing(const char *path) static int nbd_send_negotiate(int csock, off_t size, uint32_t flags) { char buf[8 + 8 + 8 + 128]; + int rc; /* Negotiate [ 0 .. 7] passwd ("NBDMAGIC") @@ -201,6 +203,8 @@ static int nbd_send_negotiate(int csock, off_t size, uint32_t flags) [28 .. 151] reserved (0) */ + rc = -EINVAL; + TRACE("Beginning negotiation."); memcpy(buf, "NBDMAGIC", 8); cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL); @@ -212,13 +216,13 @@ static int nbd_send_negotiate(int csock, off_t size, uint32_t flags) if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) { LOG("write failed"); - errno = EINVAL; - return -1; + goto fail; } TRACE("Negotiation succeeded."); - - return 0; + rc = 0; +fail: + return rc; } int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, @@ -227,20 +231,21 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, char buf[256]; uint64_t magic, s; uint16_t tmp; + int rc; TRACE("Receiving negotiation."); + rc = -EINVAL; + if (read_sync(csock, buf, 8) != 8) { LOG("read failed"); - errno = EINVAL; - return -1; + goto fail; } buf[8] = '\0'; if (strlen(buf) == 0) { LOG("server connection closed"); - errno = EINVAL; - return -1; + goto fail; } TRACE("Magic is %c%c%c%c%c%c%c%c", @@ -255,14 +260,12 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, if (memcmp(buf, "NBDMAGIC", 8) != 0) { LOG("Invalid magic received"); - errno = EINVAL; - return -1; + goto fail; } if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) { LOG("read failed"); - errno = EINVAL; - return -1; + goto fail; } magic = be64_to_cpu(magic); TRACE("Magic is 0x%" PRIx64, magic); @@ -275,61 +278,52 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, TRACE("Checking magic (opts_magic)"); if (magic != 0x49484156454F5054LL) { LOG("Bad magic received"); - errno = EINVAL; - return -1; + goto fail; } if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) { LOG("flags read failed"); - errno = EINVAL; - return -1; + goto fail; } *flags = be16_to_cpu(tmp) << 16; /* reserved for future use */ if (write_sync(csock, &reserved, sizeof(reserved)) != sizeof(reserved)) { LOG("write failed (reserved)"); - errno = EINVAL; - return -1; + goto fail; } /* write the export name */ magic = cpu_to_be64(magic); if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) { LOG("write failed (magic)"); - errno = EINVAL; - return -1; + goto fail; } opt = cpu_to_be32(NBD_OPT_EXPORT_NAME); if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) { LOG("write failed (opt)"); - errno = EINVAL; - return -1; + goto fail; } namesize = cpu_to_be32(strlen(name)); if (write_sync(csock, &namesize, sizeof(namesize)) != sizeof(namesize)) { LOG("write failed (namesize)"); - errno = EINVAL; - return -1; + goto fail; } if (write_sync(csock, (char*)name, strlen(name)) != strlen(name)) { LOG("write failed (name)"); - errno = EINVAL; - return -1; + goto fail; } } else { TRACE("Checking magic (cli_magic)"); if (magic != 0x00420281861253LL) { LOG("Bad magic received"); - errno = EINVAL; - return -1; + goto fail; } } if (read_sync(csock, &s, sizeof(s)) != sizeof(s)) { LOG("read failed"); - errno = EINVAL; - return -1; + goto fail; } *size = be64_to_cpu(s); *blocksize = 1024; @@ -338,24 +332,24 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, if (!name) { if (read_sync(csock, flags, sizeof(*flags)) != sizeof(*flags)) { LOG("read failed (flags)"); - errno = EINVAL; - return -1; + goto fail; } *flags = be32_to_cpup(flags); } else { if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) { LOG("read failed (tmp)"); - errno = EINVAL; - return -1; + goto fail; } *flags |= be32_to_cpu(tmp); } if (read_sync(csock, &buf, 124) != 124) { LOG("read failed (buf)"); - errno = EINVAL; - return -1; + goto fail; } - return 0; + rc = 0; + +fail: + return rc; } #ifdef __linux__ @@ -366,8 +360,7 @@ int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize) if (ioctl(fd, NBD_SET_SOCK, csock) < 0) { int serrno = errno; LOG("Failed to set NBD socket"); - errno = serrno; - return -1; + return -serrno; } TRACE("Setting block size to %lu", (unsigned long)blocksize); @@ -375,8 +368,7 @@ int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize) if (ioctl(fd, NBD_SET_BLKSIZE, blocksize) < 0) { int serrno = errno; LOG("Failed setting NBD block size"); - errno = serrno; - return -1; + return -serrno; } TRACE("Setting size to %zd block(s)", (size_t)(size / blocksize)); @@ -384,8 +376,7 @@ int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize) if (ioctl(fd, NBD_SET_SIZE_BLOCKS, size / blocksize) < 0) { int serrno = errno; LOG("Failed setting size (in blocks)"); - errno = serrno; - return -1; + return -serrno; } if (flags & NBD_FLAG_READ_ONLY) { @@ -395,8 +386,7 @@ int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize) if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) { int serrno = errno; LOG("Failed setting read-only attribute"); - errno = serrno; - return -1; + return -serrno; } } @@ -404,8 +394,7 @@ int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize) && errno != ENOTTY) { int serrno = errno; LOG("Failed setting flags"); - errno = serrno; - return -1; + return -serrno; } TRACE("Negotiation ended"); @@ -452,26 +441,24 @@ int nbd_client(int fd) #else int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize) { - errno = ENOTSUP; - return -1; + return -ENOTSUP; } int nbd_disconnect(int fd) { - errno = ENOTSUP; - return -1; + return -ENOTSUP; } int nbd_client(int fd) { - errno = ENOTSUP; - return -1; + return -ENOTSUP; } #endif ssize_t nbd_send_request(int csock, struct nbd_request *request) { uint8_t buf[4 + 4 + 8 + 8 + 4]; + ssize_t ret; cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC); cpu_to_be32w((uint32_t*)(buf + 4), request->type); @@ -483,10 +470,14 @@ ssize_t nbd_send_request(int csock, struct nbd_request *request) "{ .from = %" PRIu64", .len = %u, .handle = %" PRIu64", .type=%i}", request->from, request->len, request->handle, request->type); - if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) { + ret = write_sync(csock, buf, sizeof(buf)); + if (ret < 0) { + return ret; + } + + if (ret != sizeof(buf)) { LOG("writing to socket failed"); - errno = EINVAL; - return -1; + return -EINVAL; } return 0; } @@ -495,11 +486,16 @@ static ssize_t nbd_receive_request(int csock, struct nbd_request *request) { uint8_t buf[4 + 4 + 8 + 8 + 4]; uint32_t magic; + ssize_t ret; - if (read_sync(csock, buf, sizeof(buf)) != sizeof(buf)) { + ret = read_sync(csock, buf, sizeof(buf)); + if (ret < 0) { + return ret; + } + + if (ret != sizeof(buf)) { LOG("read failed"); - errno = EINVAL; - return -1; + return -EINVAL; } /* Request @@ -522,8 +518,7 @@ static ssize_t nbd_receive_request(int csock, struct nbd_request *request) if (magic != NBD_REQUEST_MAGIC) { LOG("invalid magic (got 0x%x)", magic); - errno = EINVAL; - return -1; + return -EINVAL; } return 0; } @@ -532,11 +527,16 @@ ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply) { uint8_t buf[NBD_REPLY_SIZE]; uint32_t magic; + ssize_t ret; - if (read_sync(csock, buf, sizeof(buf)) != sizeof(buf)) { + ret = read_sync(csock, buf, sizeof(buf)); + if (ret < 0) { + return ret; + } + + if (ret != sizeof(buf)) { LOG("read failed"); - errno = EINVAL; - return -1; + return -EINVAL; } /* Reply @@ -555,8 +555,7 @@ ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply) if (magic != NBD_REPLY_MAGIC) { LOG("invalid magic (got 0x%x)", magic); - errno = EINVAL; - return -1; + return -EINVAL; } return 0; } @@ -564,6 +563,7 @@ ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply) static ssize_t nbd_send_reply(int csock, struct nbd_reply *reply) { uint8_t buf[4 + 4 + 8]; + ssize_t ret; /* Reply [ 0 .. 3] magic (NBD_REPLY_MAGIC) @@ -576,10 +576,14 @@ static ssize_t nbd_send_reply(int csock, struct nbd_reply *reply) TRACE("Sending response to client"); - if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) { + ret = write_sync(csock, buf, sizeof(buf)); + if (ret < 0) { + return ret; + } + + if (ret != sizeof(buf)) { LOG("writing to socket failed"); - errno = EINVAL; - return -1; + return -EINVAL; } return 0; } @@ -713,22 +717,15 @@ static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply, if (!len) { rc = nbd_send_reply(csock, reply); - if (rc < 0) { - rc = -errno; - } } else { socket_set_cork(csock, 1); rc = nbd_send_reply(csock, reply); if (rc >= 0) { ret = qemu_co_send(csock, req->data, len); if (ret != len) { - errno = EIO; - rc = -1; + rc = -EIO; } } - if (rc < 0) { - rc = -errno; - } socket_set_cork(csock, 0); } diff --git a/nbd.h b/nbd.h index 217a82d80d..40d58d359f 100644 --- a/nbd.h +++ b/nbd.h @@ -59,7 +59,7 @@ enum { #define NBD_BUFFER_SIZE (1024*1024) -size_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read); +ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read); int tcp_socket_outgoing(const char *address, uint16_t port); int tcp_socket_incoming(const char *address, uint16_t port); int tcp_socket_outgoing_spec(const char *address_and_port); diff --git a/qemu-nbd.c b/qemu-nbd.c index 19cfb04d96..6c3f9b5de5 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -126,8 +126,7 @@ static int find_partition(BlockDriverState *bs, int partition, } if (data[510] != 0x55 || data[511] != 0xaa) { - errno = -EINVAL; - return -1; + return -EINVAL; } for (i = 0; i < 4; i++) { @@ -165,8 +164,7 @@ static int find_partition(BlockDriverState *bs, int partition, } } - errno = -ENOENT; - return -1; + return -ENOENT; } static void termsig_handler(int signum) @@ -491,9 +489,12 @@ int main(int argc, char **argv) fd_size = bs->total_sectors * 512; - if (partition != -1 && - find_partition(bs, partition, &dev_offset, &fd_size)) { - err(EXIT_FAILURE, "Could not find partition %d", partition); + if (partition != -1) { + ret = find_partition(bs, partition, &dev_offset, &fd_size); + if (ret < 0) { + errno = -ret; + err(EXIT_FAILURE, "Could not find partition %d", partition); + } } exp = nbd_export_new(bs, dev_offset, fd_size, nbdflags); From 7fe7b68b32ba609faeeee03556aac0eb1b187c91 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 5 Mar 2012 09:10:35 +0100 Subject: [PATCH 5/7] nbd: do not block in nbd_wr_sync if no data at all is available Right now, nbd_wr_sync will hang if no data at all is available on the socket and the other side is not going to provide any. Relax this by making it loop only for writes or partial reads. This fixes a race where one thread is executing qemu_aio_wait() and another is executing main_loop_wait(). Then, the select() call in main_loop_wait() can return stale data and call the "readable" callback with no data in the socket. Reported-by: Laurent Vivier Signed-off-by: Paolo Bonzini --- block/nbd.c | 12 ++++++++++-- nbd.c | 40 ++++++++++++++++++++++++++++++++++------ 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index 1b0e384c39..e0af5b4725 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -151,10 +151,18 @@ static void nbd_reply_ready(void *opaque) { BDRVNBDState *s = opaque; uint64_t i; + int ret; if (s->reply.handle == 0) { - /* No reply already in flight. Fetch a header. */ - if (nbd_receive_reply(s->sock, &s->reply) < 0) { + /* No reply already in flight. Fetch a header. It is possible + * that another thread has done the same thing in parallel, so + * the socket is not readable anymore. + */ + ret = nbd_receive_reply(s->sock, &s->reply); + if (ret == -EAGAIN) { + return; + } + if (ret < 0) { s->reply.handle = 0; goto fail; } diff --git a/nbd.c b/nbd.c index b31b3b2a4f..4c6d7f193d 100644 --- a/nbd.c +++ b/nbd.c @@ -78,9 +78,6 @@ /* That's all folks */ -#define read_sync(fd, buffer, size) nbd_wr_sync(fd, buffer, size, true) -#define write_sync(fd, buffer, size) nbd_wr_sync(fd, buffer, size, false) - ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read) { size_t offset = 0; @@ -107,7 +104,7 @@ ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read) err = socket_error(); /* recoverable error */ - if (err == EINTR || err == EAGAIN) { + if (err == EINTR || (offset > 0 && err == EAGAIN)) { continue; } @@ -126,6 +123,26 @@ ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read) return offset; } +static ssize_t read_sync(int fd, void *buffer, size_t size) +{ + /* Sockets are kept in blocking mode in the negotiation phase. After + * that, a non-readable socket simply means that another thread stole + * our request/reply. Synchronization is done with recv_coroutine, so + * that this is coroutine-safe. + */ + return nbd_wr_sync(fd, buffer, size, true); +} + +static ssize_t write_sync(int fd, void *buffer, size_t size) +{ + int ret; + do { + /* For writes, we do expect the socket to be writable. */ + ret = nbd_wr_sync(fd, buffer, size, false); + } while (ret == -EAGAIN); + return ret; +} + static void combine_addr(char *buf, size_t len, const char* address, uint16_t port) { @@ -203,6 +220,7 @@ static int nbd_send_negotiate(int csock, off_t size, uint32_t flags) [28 .. 151] reserved (0) */ + socket_set_block(csock); rc = -EINVAL; TRACE("Beginning negotiation."); @@ -222,6 +240,7 @@ static int nbd_send_negotiate(int csock, off_t size, uint32_t flags) TRACE("Negotiation succeeded."); rc = 0; fail: + socket_set_nonblock(csock); return rc; } @@ -235,6 +254,7 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, TRACE("Receiving negotiation."); + socket_set_block(csock); rc = -EINVAL; if (read_sync(csock, buf, 8) != 8) { @@ -349,6 +369,7 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, rc = 0; fail: + socket_set_nonblock(csock); return rc; } @@ -742,8 +763,11 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque ssize_t rc; client->recv_coroutine = qemu_coroutine_self(); - if (nbd_receive_request(csock, request) < 0) { - rc = -EIO; + rc = nbd_receive_request(csock, request); + if (rc < 0) { + if (rc != -EAGAIN) { + rc = -EIO; + } goto out; } @@ -791,6 +815,9 @@ static void nbd_trip(void *opaque) TRACE("Reading request."); ret = nbd_co_receive_request(req, &request); + if (ret == -EAGAIN) { + goto done; + } if (ret == -EIO) { goto out; } @@ -901,6 +928,7 @@ static void nbd_trip(void *opaque) TRACE("Request/Reply complete"); +done: nbd_request_put(req); return; From 38ceff0412621a35fa618fb46cd7f0f6ff037c13 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 12 Mar 2012 16:17:27 +0100 Subject: [PATCH 6/7] nbd: do not include block_int.h Signed-off-by: Paolo Bonzini --- nbd.c | 3 +-- qemu-nbd.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/nbd.c b/nbd.c index 4c6d7f193d..d4dafc6019 100644 --- a/nbd.c +++ b/nbd.c @@ -18,7 +18,6 @@ #include "nbd.h" #include "block.h" -#include "block_int.h" #include "qemu-coroutine.h" @@ -703,7 +702,7 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, exp->bs = bs; exp->dev_offset = dev_offset; exp->nbdflags = nbdflags; - exp->size = size == -1 ? exp->bs->total_sectors * 512 : size; + exp->size = size == -1 ? bdrv_getlength(bs) : size; return exp; } diff --git a/qemu-nbd.c b/qemu-nbd.c index 6c3f9b5de5..5a0300eb07 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -17,7 +17,7 @@ */ #include "qemu-common.h" -#include "block_int.h" +#include "block.h" #include "nbd.h" #include @@ -487,7 +487,7 @@ int main(int argc, char **argv) err(EXIT_FAILURE, "Failed to bdrv_open '%s'", argv[optind]); } - fd_size = bs->total_sectors * 512; + fd_size = bdrv_getlength(bs); if (partition != -1) { ret = find_partition(bs, partition, &dev_offset, &fd_size); From e25ceb76e5b90ec4197cbe1cb859200bf6ee1284 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 19 Apr 2012 11:59:11 +0200 Subject: [PATCH 7/7] nbd: obey FUA on reads Signed-off-by: Paolo Bonzini --- nbd.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/nbd.c b/nbd.c index d4dafc6019..153709f628 100644 --- a/nbd.c +++ b/nbd.c @@ -842,6 +842,15 @@ static void nbd_trip(void *opaque) case NBD_CMD_READ: TRACE("Request type is READ"); + if (request.type & NBD_CMD_FLAG_FUA) { + ret = bdrv_co_flush(exp->bs); + if (ret < 0) { + LOG("flush failed"); + reply.error = -ret; + goto error_reply; + } + } + ret = bdrv_read(exp->bs, (request.from + exp->dev_offset) / 512, req->data, request.len / 512); if (ret < 0) {