From 637bc5a5d8b7612bbbccd998d4183d2affb189e6 Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Wed, 8 May 2019 23:18:16 +0200
Subject: [PATCH 1/9] qemu-nbd: Add --pid-file option

--fork is a bit boring if there is no way to get the child's PID.  This
option helps.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-Id: <20190508211820.17851-2-mreitz@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
---
 qemu-nbd.c    | 11 +++++++++++
 qemu-nbd.texi |  2 ++
 2 files changed, 13 insertions(+)

diff --git a/qemu-nbd.c b/qemu-nbd.c
index e24dd2f767..99377a3f14 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -61,6 +61,7 @@
 #define QEMU_NBD_OPT_IMAGE_OPTS    262
 #define QEMU_NBD_OPT_FORK          263
 #define QEMU_NBD_OPT_TLSAUTHZ      264
+#define QEMU_NBD_OPT_PID_FILE      265
 
 #define MBR_SIZE 512
 
@@ -113,6 +114,7 @@ static void usage(const char *name)
 "                            specify tracing options\n"
 "  --fork                    fork off the server process and exit the parent\n"
 "                            once the server is running\n"
+"  --pid-file=PATH           store the server's process ID in the given file\n"
 #if HAVE_NBD_DEVICE
 "\n"
 "Kernel NBD client support:\n"
@@ -641,6 +643,7 @@ int main(int argc, char **argv)
         { "image-opts", no_argument, NULL, QEMU_NBD_OPT_IMAGE_OPTS },
         { "trace", required_argument, NULL, 'T' },
         { "fork", no_argument, NULL, QEMU_NBD_OPT_FORK },
+        { "pid-file", required_argument, NULL, QEMU_NBD_OPT_PID_FILE },
         { NULL, 0, NULL, 0 }
     };
     int ch;
@@ -667,6 +670,7 @@ int main(int argc, char **argv)
     bool list = false;
     int old_stderr = -1;
     unsigned socket_activation;
+    const char *pid_file_name = NULL;
 
     /* The client thread uses SIGTERM to interrupt the server.  A signal
      * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
@@ -866,6 +870,9 @@ int main(int argc, char **argv)
         case 'L':
             list = true;
             break;
+        case QEMU_NBD_OPT_PID_FILE:
+            pid_file_name = optarg;
+            break;
         }
     }
 
@@ -1186,6 +1193,10 @@ int main(int argc, char **argv)
 
     nbd_update_server_watch();
 
+    if (pid_file_name) {
+        qemu_write_pidfile(pid_file_name, &error_fatal);
+    }
+
     /* now when the initialization is (almost) complete, chdir("/")
      * to free any busy filesystems */
     if (chdir("/") < 0) {
diff --git a/qemu-nbd.texi b/qemu-nbd.texi
index de342c76b8..7f55657722 100644
--- a/qemu-nbd.texi
+++ b/qemu-nbd.texi
@@ -117,6 +117,8 @@ option; or provide the credentials needed for connecting as a client
 in list mode.
 @item --fork
 Fork off the server process and exit the parent once the server is running.
+@item --pid-file=PATH
+Store the server's process ID in the given file.
 @item --tls-authz=ID
 Specify the ID of a qauthz object previously created with the
 --object option. This will be used to authorize connecting users

From 6177b58431cd19fddfd025225403876b8deeeec7 Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Wed, 8 May 2019 23:18:17 +0200
Subject: [PATCH 2/9] iotests.py: Add qemu_nbd_early_pipe()

qemu_nbd_pipe() currently unconditionally reads qemu-nbd's output.  That
is not ideal because qemu-nbd may keep stderr open after the parent
process has exited.

Currently, the only user of qemu_nbd_pipe() is 147, which discards the
whole output if the parent process returned success and only evaluates
it on error.  Therefore, we can replace qemu_nbd_pipe() by
qemu_nbd_early_pipe() that does the same: Discard the output on success,
and return it on error.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20190508211820.17851-3-mreitz@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
---
 tests/qemu-iotests/147        | 4 ++--
 tests/qemu-iotests/iotests.py | 9 ++++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/tests/qemu-iotests/147 b/tests/qemu-iotests/147
index 82513279b0..2d84fddb01 100755
--- a/tests/qemu-iotests/147
+++ b/tests/qemu-iotests/147
@@ -24,7 +24,7 @@ import socket
 import stat
 import time
 import iotests
-from iotests import cachemode, imgfmt, qemu_img, qemu_nbd, qemu_nbd_pipe
+from iotests import cachemode, imgfmt, qemu_img, qemu_nbd, qemu_nbd_early_pipe
 
 NBD_PORT_START      = 32768
 NBD_PORT_END        = NBD_PORT_START + 1024
@@ -93,7 +93,7 @@ class QemuNBD(NBDBlockdevAddBase):
             pass
 
     def _try_server_up(self, *args):
-        status, msg = qemu_nbd_pipe('-f', imgfmt, test_img, *args)
+        status, msg = qemu_nbd_early_pipe('-f', imgfmt, test_img, *args)
         if status == 0:
             return True
         if 'Address already in use' in msg:
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 6bcddd8870..f11482f3dc 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -209,9 +209,9 @@ def qemu_nbd(*args):
     '''Run qemu-nbd in daemon mode and return the parent's exit code'''
     return subprocess.call(qemu_nbd_args + ['--fork'] + list(args))
 
-def qemu_nbd_pipe(*args):
+def qemu_nbd_early_pipe(*args):
     '''Run qemu-nbd in daemon mode and return both the parent's exit code
-       and its output'''
+       and its output in case of an error'''
     subp = subprocess.Popen(qemu_nbd_args + ['--fork'] + list(args),
                             stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT,
@@ -221,7 +221,10 @@ def qemu_nbd_pipe(*args):
         sys.stderr.write('qemu-nbd received signal %i: %s\n' %
                          (-exitcode,
                           ' '.join(qemu_nbd_args + ['--fork'] + list(args))))
-    return exitcode, subp.communicate()[0]
+    if exitcode == 0:
+        return exitcode, ''
+    else:
+        return exitcode, subp.communicate()[0]
 
 def compare_images(img1, img2, fmt1=imgfmt, fmt2=imgfmt):
     '''Return True if two image files are identical'''

From e6df58a5578fee7a50bbf36f4a50a2781cff855d Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Wed, 8 May 2019 23:18:18 +0200
Subject: [PATCH 3/9] qemu-nbd: Do not close stderr

We kept old_stderr specifically so we could keep emitting error message
on stderr.  However, qemu_daemon() closes stderr.  Therefore, we need to
dup() stderr to old_stderr before invoking qemu_daemon().

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20190508211820.17851-4-mreitz@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
---
 qemu-nbd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/qemu-nbd.c b/qemu-nbd.c
index 99377a3f14..a8cb39e510 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -1004,10 +1004,11 @@ int main(int argc, char **argv)
             exit(EXIT_FAILURE);
         } else if (pid == 0) {
             close(stderr_fd[0]);
+
+            old_stderr = dup(STDERR_FILENO);
             ret = qemu_daemon(1, 0);
 
             /* Temporarily redirect stderr to the parent's pipe...  */
-            old_stderr = dup(STDERR_FILENO);
             dup2(stderr_fd[1], STDERR_FILENO);
             if (ret < 0) {
                 error_report("Failed to daemonize: %s", strerror(errno));

From 4718360a58cb0a02a68d3b0e52a4711d7c398357 Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Wed, 8 May 2019 23:18:19 +0200
Subject: [PATCH 4/9] iotests: Use qemu-nbd's --pid-file

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-Id: <20190508211820.17851-5-mreitz@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
---
 tests/qemu-iotests/common.rc | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 93f87389b6..5502c3da2f 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -105,10 +105,8 @@ _qemu_io_wrapper()
 
 _qemu_nbd_wrapper()
 {
-    (
-        echo $BASHPID > "${QEMU_TEST_DIR}/qemu-nbd.pid"
-        exec "$QEMU_NBD_PROG" $QEMU_NBD_OPTIONS "$@"
-    )
+    "$QEMU_NBD_PROG" --pid-file="${QEMU_TEST_DIR}/qemu-nbd.pid" \
+                     $QEMU_NBD_OPTIONS "$@"
 }
 
 _qemu_vxhs_wrapper()

From b28f582c2acaca26b66262d75cc5a0bd2764482c Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Wed, 8 May 2019 23:18:20 +0200
Subject: [PATCH 5/9] iotests: Let 233 run concurrently

common.nbd's nbd_server_set_tcp_port() tries to find a free port, and
then uses it for the whole test run.  However, this is racy because even
if the port was free at the beginning, there is no guarantee it will
continue to be available.  Therefore, 233 currently cannot reliably be
run concurrently with other NBD TCP tests.

This patch addresses the problem by dropping nbd_server_set_tcp_port(),
and instead finding a new port every time nbd_server_start_tcp_socket()
is invoked.  For this, we run qemu-nbd with --fork and on error evaluate
the output to see whether it contains "Address already in use".  If so,
we try the next port.

On success, we still want to continually redirect the output from
qemu-nbd to stderr.  To achieve both, we redirect qemu-nbd's stderr to a
FIFO that we then open in bash.  If the parent process exits with status
0 (which means that the server has started successfully), we launch a
background cat process that copies the FIFO to stderr.  On failure, we
read the whole content into a variable and then evaluate it.

While at it, use --fork in nbd_server_start_unix_socket(), too.  Doing
so allows us to drop nbd_server_wait_for_*_socket().

Note that the reason common.nbd did not use --fork before is that
qemu-nbd did not have --pid-file.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20190508211820.17851-6-mreitz@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
---
 tests/qemu-iotests/233        |   1 -
 tests/qemu-iotests/common.nbd | 116 ++++++++++++++++------------------
 2 files changed, 53 insertions(+), 64 deletions(-)

diff --git a/tests/qemu-iotests/233 b/tests/qemu-iotests/233
index 057cad2044..a5c17c3963 100755
--- a/tests/qemu-iotests/233
+++ b/tests/qemu-iotests/233
@@ -49,7 +49,6 @@ _supported_proto file
 # If porting to non-Linux, consider using socat instead of ss in common.nbd
 _require_command QEMU_NBD
 
-nbd_server_set_tcp_port
 tls_x509_init
 
 echo
diff --git a/tests/qemu-iotests/common.nbd b/tests/qemu-iotests/common.nbd
index 25fc9ffaa4..24b01b60aa 100644
--- a/tests/qemu-iotests/common.nbd
+++ b/tests/qemu-iotests/common.nbd
@@ -22,6 +22,11 @@
 nbd_unix_socket="${TEST_DIR}/qemu-nbd.sock"
 nbd_tcp_addr="127.0.0.1"
 nbd_pid_file="${TEST_DIR}/qemu-nbd.pid"
+nbd_stderr_fifo="${TEST_DIR}/qemu-nbd.fifo"
+
+# If bash version is >= 4.1, this will be overwritten by a dynamically
+# assigned file descriptor value.
+nbd_fifo_fd=10
 
 nbd_server_stop()
 {
@@ -33,77 +38,62 @@ nbd_server_stop()
             kill "$NBD_PID"
         fi
     fi
-    rm -f "$nbd_unix_socket"
-}
-
-nbd_server_wait_for_unix_socket()
-{
-    pid=$1
-
-    for ((i = 0; i < 300; i++))
-    do
-        if [ -r "$nbd_unix_socket" ]; then
-            return
-        fi
-        kill -s 0 $pid 2>/dev/null
-        if test $? != 0
-        then
-            echo "qemu-nbd unexpectedly quit"
-            exit 1
-        fi
-        sleep 0.1
-    done
-    echo "Failed in check of unix socket created by qemu-nbd"
-    exit 1
+    rm -f "$nbd_unix_socket" "$nbd_stderr_fifo"
 }
 
 nbd_server_start_unix_socket()
 {
     nbd_server_stop
-    $QEMU_NBD -v -t -k "$nbd_unix_socket" "$@" &
-    nbd_server_wait_for_unix_socket $!
-}
-
-nbd_server_set_tcp_port()
-{
-    (ss --help) >/dev/null 2>&1 || _notrun "ss utility not found, skipping test"
-
-    for ((port = 10809; port <= 10909; port++))
-    do
-        if ! ss -tln | grep -sqE ":$port\b"; then
-            nbd_tcp_port=$port
-            return
-        fi
-    done
-
-    echo "Cannot find free TCP port for nbd in range 10809-10909"
-    exit 1
-}
-
-nbd_server_wait_for_tcp_socket()
-{
-    pid=$1
-
-    for ((i = 0; i < 300; i++))
-    do
-        if ss -tln | grep -sqE ":$nbd_tcp_port\b"; then
-            return
-        fi
-        kill -s 0 $pid 2>/dev/null
-        if test $? != 0
-        then
-            echo "qemu-nbd unexpectedly quit"
-            exit 1
-        fi
-        sleep 0.1
-    done
-    echo "Failed in check of TCP socket created by qemu-nbd"
-    exit 1
+    $QEMU_NBD -v -t -k "$nbd_unix_socket" --fork "$@"
 }
 
 nbd_server_start_tcp_socket()
 {
     nbd_server_stop
-    $QEMU_NBD -v -t -b $nbd_tcp_addr -p $nbd_tcp_port "$@" &
-    nbd_server_wait_for_tcp_socket $!
+
+    mkfifo "$nbd_stderr_fifo"
+    for ((port = 10809; port <= 10909; port++))
+    do
+        # Redirect stderr to FIFO, so we can later decide whether we
+        # want to read it or to redirect it to our stderr, depending
+        # on whether the command fails or not
+        $QEMU_NBD -v -t -b $nbd_tcp_addr -p $port --fork "$@" \
+            2> "$nbd_stderr_fifo" &
+
+        # Taken from common.qemu
+        if [[ "${BASH_VERSINFO[0]}" -ge "5" ||
+            ("${BASH_VERSINFO[0]}" -ge "4" && "${BASH_VERSINFO[1]}" -ge "1") ]]
+        then
+            exec {nbd_fifo_fd}<"$nbd_stderr_fifo"
+        else
+            let _nbd_fifo_fd++
+            eval "exec ${_nbd_fifo_fd}<'$nbd_stderr_fifo'"
+        fi
+        wait $!
+
+        if test $? == 0
+        then
+            # Success, redirect qemu-nbd's stderr to our stderr
+            nbd_tcp_port=$port
+            (cat <&$nbd_fifo_fd >&2) &
+            eval "exec $nbd_fifo_fd>&-"
+            return
+        fi
+
+        # Failure, read the output
+        output=$(cat <&$nbd_fifo_fd)
+        eval "exec $nbd_fifo_fd>&-"
+
+        if ! echo "$output" | grep -q "Address already in use"
+        then
+            # Unknown error, print it
+            echo "$output" >&2
+            rm -f "$nbd_stderr_fifo"
+            exit 1
+        fi
+    done
+
+    echo "Cannot find free TCP port for nbd in range 10809-10909"
+    rm -f "$nbd_stderr_fifo"
+    exit 1
 }

From 416e34bd04bea32a6e68c103dbbb4210a1f1d0dd Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 10 May 2019 10:17:35 -0500
Subject: [PATCH 6/9] nbd/server: Nicer spelling of max BLOCK_STATUS reply
 length

Commit 3d068aff (3.0) introduced NBD_MAX_BITMAP_EXTENTS as a limit on
how large we would allow a reply to NBD_CMD_BLOCK_STATUS to grow when
it is visiting a qemu:dirty-bitmap: context.  Later, commit fb7afc79
(3.1) reused the constant to limit base:allocation context replies,
although the name is now less appropriate in that situation.

Rename things, and improve the macro to use units.h for better
legibility. Then reformat the comment to comply with checkpatch rules
added in the meantime. No semantic change.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20190510151735.29687-1-eblake@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 nbd/server.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index aeca3893fe..10faedcfc5 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -21,15 +21,18 @@
 #include "qapi/error.h"
 #include "trace.h"
 #include "nbd-internal.h"
+#include "qemu/units.h"
 
 #define NBD_META_ID_BASE_ALLOCATION 0
 #define NBD_META_ID_DIRTY_BITMAP 1
 
-/* NBD_MAX_BITMAP_EXTENTS: 1 mb of extents data. An empirical
+/*
+ * NBD_MAX_BLOCK_STATUS_EXTENTS: 1 MiB of extents data. An empirical
  * constant. If an increase is needed, note that the NBD protocol
  * recommends no larger than 32 mb, so that the client won't consider
- * the reply as a denial of service attack. */
-#define NBD_MAX_BITMAP_EXTENTS (0x100000 / 8)
+ * the reply as a denial of service attack.
+ */
+#define NBD_MAX_BLOCK_STATUS_EXTENTS (1 * MiB / 8)
 
 static int system_errno_to_nbd_errno(int err)
 {
@@ -1960,7 +1963,7 @@ static int nbd_co_send_block_status(NBDClient *client, uint64_t handle,
                                     Error **errp)
 {
     int ret;
-    unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BITMAP_EXTENTS;
+    unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
     NBDExtent *extents = g_new(NBDExtent, nb_extents);
     uint64_t final_length = length;
 
@@ -2045,7 +2048,7 @@ static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle,
                               uint32_t context_id, Error **errp)
 {
     int ret;
-    unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BITMAP_EXTENTS;
+    unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
     NBDExtent *extents = g_new(NBDExtent, nb_extents);
     uint64_t final_length = length;
 

From 0a93b359db270788b81c1dba6ce868877381fa94 Mon Sep 17 00:00:00 2001
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Date: Tue, 11 Jun 2019 13:27:18 +0300
Subject: [PATCH 7/9] block/nbd-client: drop stale logout

Drop one on failure path (we have errp) and turn two others into trace
points.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20190611102720.86114-2-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
---
 block/nbd-client.c | 6 +++---
 block/nbd-client.h | 9 ---------
 block/trace-events | 2 ++
 3 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index 790ecc1ee1..f89a67c23b 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -1136,7 +1136,7 @@ static int nbd_client_connect(BlockDriverState *bs,
     }
 
     /* NBD handshake */
-    logout("session init %s\n", export);
+    trace_nbd_client_connect(export);
     qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
 
     client->info.request_sizes = true;
@@ -1149,7 +1149,6 @@ static int nbd_client_connect(BlockDriverState *bs,
     g_free(client->info.x_dirty_bitmap);
     g_free(client->info.name);
     if (ret < 0) {
-        logout("Failed to negotiate with the NBD server\n");
         object_unref(OBJECT(sioc));
         return ret;
     }
@@ -1187,7 +1186,8 @@ static int nbd_client_connect(BlockDriverState *bs,
     bdrv_inc_in_flight(bs);
     nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
 
-    logout("Established connection with NBD server\n");
+    trace_nbd_client_connect_success(export);
+
     return 0;
 
  fail:
diff --git a/block/nbd-client.h b/block/nbd-client.h
index 4587053114..570538f4c8 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -5,15 +5,6 @@
 #include "block/block_int.h"
 #include "io/channel-socket.h"
 
-/* #define DEBUG_NBD */
-
-#if defined(DEBUG_NBD)
-#define logout(fmt, ...) \
-    fprintf(stderr, "nbd\t%-24s" fmt, __func__, ##__VA_ARGS__)
-#else
-#define logout(fmt, ...) ((void)0)
-#endif
-
 #define MAX_NBD_REQUESTS    16
 
 typedef struct {
diff --git a/block/trace-events b/block/trace-events
index eab51497fc..01fa5eb081 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -165,6 +165,8 @@ nbd_parse_blockstatus_compliance(const char *err) "ignoring extra data from non-
 nbd_structured_read_compliance(const char *type) "server sent non-compliant unaligned read %s chunk"
 nbd_read_reply_entry_fail(int ret, const char *err) "ret = %d, err: %s"
 nbd_co_request_fail(uint64_t from, uint32_t len, uint64_t handle, uint16_t flags, uint16_t type, const char *name, int ret, const char *err) "Request failed { .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 ", .flags = 0x%" PRIx16 ", .type = %" PRIu16 " (%s) } ret = %d, err: %s"
+nbd_client_connect(const char *export_name) "export '%s'"
+nbd_client_connect_success(const char *export_name) "export '%s'"
 
 # ssh.c
 ssh_restart_coroutine(void *co) "co=%p"

From 86f8cdf3db8bc263ae08885e04ce96c089d1cac8 Mon Sep 17 00:00:00 2001
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Date: Tue, 11 Jun 2019 13:27:19 +0300
Subject: [PATCH 8/9] block/nbd: merge nbd-client.* to nbd.c

No reason for keeping driver handlers realization separate from driver
structure. We can get rid of extra header file.

While being here, fix comments style, restore forgotten comments for
NBD_FOREACH_REPLY_CHUNK and nbd_reply_chunk_iter_receive, remove extra
includes.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20190611102720.86114-3-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
---
 block/Makefile.objs |    2 +-
 block/nbd-client.c  | 1226 -----------------------------------------
 block/nbd-client.h  |   62 ---
 block/nbd.c         | 1287 +++++++++++++++++++++++++++++++++++++++++--
 block/trace-events  |    2 +-
 5 files changed, 1256 insertions(+), 1323 deletions(-)
 delete mode 100644 block/nbd-client.c
 delete mode 100644 block/nbd-client.h

diff --git a/block/Makefile.objs b/block/Makefile.objs
index ae11605c9f..dbd1522722 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -22,7 +22,7 @@ block-obj-y += null.o mirror.o commit.o io.o create.o
 block-obj-y += throttle-groups.o
 block-obj-$(CONFIG_LINUX) += nvme.o
 
-block-obj-y += nbd.o nbd-client.o
+block-obj-y += nbd.o
 block-obj-$(CONFIG_SHEEPDOG) += sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
 block-obj-$(if $(CONFIG_LIBISCSI),y,n) += iscsi-opts.o
diff --git a/block/nbd-client.c b/block/nbd-client.c
deleted file mode 100644
index f89a67c23b..0000000000
--- a/block/nbd-client.c
+++ /dev/null
@@ -1,1226 +0,0 @@
-/*
- * QEMU Block driver for  NBD
- *
- * Copyright (C) 2016 Red Hat, Inc.
- * Copyright (C) 2008 Bull S.A.S.
- *     Author: Laurent Vivier <Laurent.Vivier@bull.net>
- *
- * Some parts:
- *    Copyright (C) 2007 Anthony Liguori <anthony@codemonkey.ws>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-
-#include "trace.h"
-#include "qapi/error.h"
-#include "nbd-client.h"
-
-#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ (uint64_t)(intptr_t)(bs))
-#define INDEX_TO_HANDLE(bs, index)  ((index)  ^ (uint64_t)(intptr_t)(bs))
-
-static void nbd_recv_coroutines_wake_all(NBDClientSession *s)
-{
-    int i;
-
-    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-        NBDClientRequest *req = &s->requests[i];
-
-        if (req->coroutine && req->receiving) {
-            aio_co_wake(req->coroutine);
-        }
-    }
-}
-
-static void nbd_teardown_connection(BlockDriverState *bs)
-{
-    NBDClientSession *client = nbd_get_client_session(bs);
-
-    assert(client->ioc);
-
-    /* finish any pending coroutines */
-    qio_channel_shutdown(client->ioc,
-                         QIO_CHANNEL_SHUTDOWN_BOTH,
-                         NULL);
-    BDRV_POLL_WHILE(bs, client->connection_co);
-
-    nbd_client_detach_aio_context(bs);
-    object_unref(OBJECT(client->sioc));
-    client->sioc = NULL;
-    object_unref(OBJECT(client->ioc));
-    client->ioc = NULL;
-}
-
-static coroutine_fn void nbd_connection_entry(void *opaque)
-{
-    NBDClientSession *s = opaque;
-    uint64_t i;
-    int ret = 0;
-    Error *local_err = NULL;
-
-    while (!s->quit) {
-        /*
-         * The NBD client can only really be considered idle when it has
-         * yielded from qio_channel_readv_all_eof(), waiting for data. This is
-         * the point where the additional scheduled coroutine entry happens
-         * after nbd_client_attach_aio_context().
-         *
-         * Therefore we keep an additional in_flight reference all the time and
-         * only drop it temporarily here.
-         */
-        assert(s->reply.handle == 0);
-        ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, &local_err);
-
-        if (local_err) {
-            trace_nbd_read_reply_entry_fail(ret, error_get_pretty(local_err));
-            error_free(local_err);
-        }
-        if (ret <= 0) {
-            break;
-        }
-
-        /* There's no need for a mutex on the receive side, because the
-         * handler acts as a synchronization point and ensures that only
-         * one coroutine is called until the reply finishes.
-         */
-        i = HANDLE_TO_INDEX(s, s->reply.handle);
-        if (i >= MAX_NBD_REQUESTS ||
-            !s->requests[i].coroutine ||
-            !s->requests[i].receiving ||
-            (nbd_reply_is_structured(&s->reply) && !s->info.structured_reply))
-        {
-            break;
-        }
-
-        /* We're woken up again by the request itself.  Note that there
-         * is no race between yielding and reentering connection_co.  This
-         * is because:
-         *
-         * - if the request runs on the same AioContext, it is only
-         *   entered after we yield
-         *
-         * - if the request runs on a different AioContext, reentering
-         *   connection_co happens through a bottom half, which can only
-         *   run after we yield.
-         */
-        aio_co_wake(s->requests[i].coroutine);
-        qemu_coroutine_yield();
-    }
-
-    s->quit = true;
-    nbd_recv_coroutines_wake_all(s);
-    bdrv_dec_in_flight(s->bs);
-
-    s->connection_co = NULL;
-    aio_wait_kick();
-}
-
-static int nbd_co_send_request(BlockDriverState *bs,
-                               NBDRequest *request,
-                               QEMUIOVector *qiov)
-{
-    NBDClientSession *s = nbd_get_client_session(bs);
-    int rc, i;
-
-    qemu_co_mutex_lock(&s->send_mutex);
-    while (s->in_flight == MAX_NBD_REQUESTS) {
-        qemu_co_queue_wait(&s->free_sema, &s->send_mutex);
-    }
-    s->in_flight++;
-
-    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-        if (s->requests[i].coroutine == NULL) {
-            break;
-        }
-    }
-
-    g_assert(qemu_in_coroutine());
-    assert(i < MAX_NBD_REQUESTS);
-
-    s->requests[i].coroutine = qemu_coroutine_self();
-    s->requests[i].offset = request->from;
-    s->requests[i].receiving = false;
-
-    request->handle = INDEX_TO_HANDLE(s, i);
-
-    if (s->quit) {
-        rc = -EIO;
-        goto err;
-    }
-    assert(s->ioc);
-
-    if (qiov) {
-        qio_channel_set_cork(s->ioc, true);
-        rc = nbd_send_request(s->ioc, request);
-        if (rc >= 0 && !s->quit) {
-            if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov,
-                                       NULL) < 0) {
-                rc = -EIO;
-            }
-        } else if (rc >= 0) {
-            rc = -EIO;
-        }
-        qio_channel_set_cork(s->ioc, false);
-    } else {
-        rc = nbd_send_request(s->ioc, request);
-    }
-
-err:
-    if (rc < 0) {
-        s->quit = true;
-        s->requests[i].coroutine = NULL;
-        s->in_flight--;
-        qemu_co_queue_next(&s->free_sema);
-    }
-    qemu_co_mutex_unlock(&s->send_mutex);
-    return rc;
-}
-
-static inline uint16_t payload_advance16(uint8_t **payload)
-{
-    *payload += 2;
-    return lduw_be_p(*payload - 2);
-}
-
-static inline uint32_t payload_advance32(uint8_t **payload)
-{
-    *payload += 4;
-    return ldl_be_p(*payload - 4);
-}
-
-static inline uint64_t payload_advance64(uint8_t **payload)
-{
-    *payload += 8;
-    return ldq_be_p(*payload - 8);
-}
-
-static int nbd_parse_offset_hole_payload(NBDClientSession *client,
-                                         NBDStructuredReplyChunk *chunk,
-                                         uint8_t *payload, uint64_t orig_offset,
-                                         QEMUIOVector *qiov, Error **errp)
-{
-    uint64_t offset;
-    uint32_t hole_size;
-
-    if (chunk->length != sizeof(offset) + sizeof(hole_size)) {
-        error_setg(errp, "Protocol error: invalid payload for "
-                         "NBD_REPLY_TYPE_OFFSET_HOLE");
-        return -EINVAL;
-    }
-
-    offset = payload_advance64(&payload);
-    hole_size = payload_advance32(&payload);
-
-    if (!hole_size || offset < orig_offset || hole_size > qiov->size ||
-        offset > orig_offset + qiov->size - hole_size) {
-        error_setg(errp, "Protocol error: server sent chunk exceeding requested"
-                         " region");
-        return -EINVAL;
-    }
-    if (client->info.min_block &&
-        !QEMU_IS_ALIGNED(hole_size, client->info.min_block)) {
-        trace_nbd_structured_read_compliance("hole");
-    }
-
-    qemu_iovec_memset(qiov, offset - orig_offset, 0, hole_size);
-
-    return 0;
-}
-
-/* nbd_parse_blockstatus_payload
- * Based on our request, we expect only one extent in reply, for the
- * base:allocation context.
- */
-static int nbd_parse_blockstatus_payload(NBDClientSession *client,
-                                         NBDStructuredReplyChunk *chunk,
-                                         uint8_t *payload, uint64_t orig_length,
-                                         NBDExtent *extent, Error **errp)
-{
-    uint32_t context_id;
-
-    /* The server succeeded, so it must have sent [at least] one extent */
-    if (chunk->length < sizeof(context_id) + sizeof(*extent)) {
-        error_setg(errp, "Protocol error: invalid payload for "
-                         "NBD_REPLY_TYPE_BLOCK_STATUS");
-        return -EINVAL;
-    }
-
-    context_id = payload_advance32(&payload);
-    if (client->info.context_id != context_id) {
-        error_setg(errp, "Protocol error: unexpected context id %d for "
-                         "NBD_REPLY_TYPE_BLOCK_STATUS, when negotiated context "
-                         "id is %d", context_id,
-                         client->info.context_id);
-        return -EINVAL;
-    }
-
-    extent->length = payload_advance32(&payload);
-    extent->flags = payload_advance32(&payload);
-
-    if (extent->length == 0) {
-        error_setg(errp, "Protocol error: server sent status chunk with "
-                   "zero length");
-        return -EINVAL;
-    }
-
-    /*
-     * A server sending unaligned block status is in violation of the
-     * protocol, but as qemu-nbd 3.1 is such a server (at least for
-     * POSIX files that are not a multiple of 512 bytes, since qemu
-     * rounds files up to 512-byte multiples but lseek(SEEK_HOLE)
-     * still sees an implicit hole beyond the real EOF), it's nicer to
-     * work around the misbehaving server. If the request included
-     * more than the final unaligned block, truncate it back to an
-     * aligned result; if the request was only the final block, round
-     * up to the full block and change the status to fully-allocated
-     * (always a safe status, even if it loses information).
-     */
-    if (client->info.min_block && !QEMU_IS_ALIGNED(extent->length,
-                                                   client->info.min_block)) {
-        trace_nbd_parse_blockstatus_compliance("extent length is unaligned");
-        if (extent->length > client->info.min_block) {
-            extent->length = QEMU_ALIGN_DOWN(extent->length,
-                                             client->info.min_block);
-        } else {
-            extent->length = client->info.min_block;
-            extent->flags = 0;
-        }
-    }
-
-    /*
-     * We used NBD_CMD_FLAG_REQ_ONE, so the server should not have
-     * sent us any more than one extent, nor should it have included
-     * status beyond our request in that extent. However, it's easy
-     * enough to ignore the server's noncompliance without killing the
-     * connection; just ignore trailing extents, and clamp things to
-     * the length of our request.
-     */
-    if (chunk->length > sizeof(context_id) + sizeof(*extent)) {
-        trace_nbd_parse_blockstatus_compliance("more than one extent");
-    }
-    if (extent->length > orig_length) {
-        extent->length = orig_length;
-        trace_nbd_parse_blockstatus_compliance("extent length too large");
-    }
-
-    return 0;
-}
-
-/* nbd_parse_error_payload
- * on success @errp contains message describing nbd error reply
- */
-static int nbd_parse_error_payload(NBDStructuredReplyChunk *chunk,
-                                   uint8_t *payload, int *request_ret,
-                                   Error **errp)
-{
-    uint32_t error;
-    uint16_t message_size;
-
-    assert(chunk->type & (1 << 15));
-
-    if (chunk->length < sizeof(error) + sizeof(message_size)) {
-        error_setg(errp,
-                   "Protocol error: invalid payload for structured error");
-        return -EINVAL;
-    }
-
-    error = nbd_errno_to_system_errno(payload_advance32(&payload));
-    if (error == 0) {
-        error_setg(errp, "Protocol error: server sent structured error chunk "
-                         "with error = 0");
-        return -EINVAL;
-    }
-
-    *request_ret = -error;
-    message_size = payload_advance16(&payload);
-
-    if (message_size > chunk->length - sizeof(error) - sizeof(message_size)) {
-        error_setg(errp, "Protocol error: server sent structured error chunk "
-                         "with incorrect message size");
-        return -EINVAL;
-    }
-
-    /* TODO: Add a trace point to mention the server complaint */
-
-    /* TODO handle ERROR_OFFSET */
-
-    return 0;
-}
-
-static int nbd_co_receive_offset_data_payload(NBDClientSession *s,
-                                              uint64_t orig_offset,
-                                              QEMUIOVector *qiov, Error **errp)
-{
-    QEMUIOVector sub_qiov;
-    uint64_t offset;
-    size_t data_size;
-    int ret;
-    NBDStructuredReplyChunk *chunk = &s->reply.structured;
-
-    assert(nbd_reply_is_structured(&s->reply));
-
-    /* The NBD spec requires at least one byte of payload */
-    if (chunk->length <= sizeof(offset)) {
-        error_setg(errp, "Protocol error: invalid payload for "
-                         "NBD_REPLY_TYPE_OFFSET_DATA");
-        return -EINVAL;
-    }
-
-    if (nbd_read64(s->ioc, &offset, "OFFSET_DATA offset", errp) < 0) {
-        return -EIO;
-    }
-
-    data_size = chunk->length - sizeof(offset);
-    assert(data_size);
-    if (offset < orig_offset || data_size > qiov->size ||
-        offset > orig_offset + qiov->size - data_size) {
-        error_setg(errp, "Protocol error: server sent chunk exceeding requested"
-                         " region");
-        return -EINVAL;
-    }
-    if (s->info.min_block && !QEMU_IS_ALIGNED(data_size, s->info.min_block)) {
-        trace_nbd_structured_read_compliance("data");
-    }
-
-    qemu_iovec_init(&sub_qiov, qiov->niov);
-    qemu_iovec_concat(&sub_qiov, qiov, offset - orig_offset, data_size);
-    ret = qio_channel_readv_all(s->ioc, sub_qiov.iov, sub_qiov.niov, errp);
-    qemu_iovec_destroy(&sub_qiov);
-
-    return ret < 0 ? -EIO : 0;
-}
-
-#define NBD_MAX_MALLOC_PAYLOAD 1000
-/* nbd_co_receive_structured_payload
- */
-static coroutine_fn int nbd_co_receive_structured_payload(
-        NBDClientSession *s, void **payload, Error **errp)
-{
-    int ret;
-    uint32_t len;
-
-    assert(nbd_reply_is_structured(&s->reply));
-
-    len = s->reply.structured.length;
-
-    if (len == 0) {
-        return 0;
-    }
-
-    if (payload == NULL) {
-        error_setg(errp, "Unexpected structured payload");
-        return -EINVAL;
-    }
-
-    if (len > NBD_MAX_MALLOC_PAYLOAD) {
-        error_setg(errp, "Payload too large");
-        return -EINVAL;
-    }
-
-    *payload = g_new(char, len);
-    ret = nbd_read(s->ioc, *payload, len, "structured payload", errp);
-    if (ret < 0) {
-        g_free(*payload);
-        *payload = NULL;
-        return ret;
-    }
-
-    return 0;
-}
-
-/* nbd_co_do_receive_one_chunk
- * for simple reply:
- *   set request_ret to received reply error
- *   if qiov is not NULL: read payload to @qiov
- * for structured reply chunk:
- *   if error chunk: read payload, set @request_ret, do not set @payload
- *   else if offset_data chunk: read payload data to @qiov, do not set @payload
- *   else: read payload to @payload
- *
- * If function fails, @errp contains corresponding error message, and the
- * connection with the server is suspect.  If it returns 0, then the
- * transaction succeeded (although @request_ret may be a negative errno
- * corresponding to the server's error reply), and errp is unchanged.
- */
-static coroutine_fn int nbd_co_do_receive_one_chunk(
-        NBDClientSession *s, uint64_t handle, bool only_structured,
-        int *request_ret, QEMUIOVector *qiov, void **payload, Error **errp)
-{
-    int ret;
-    int i = HANDLE_TO_INDEX(s, handle);
-    void *local_payload = NULL;
-    NBDStructuredReplyChunk *chunk;
-
-    if (payload) {
-        *payload = NULL;
-    }
-    *request_ret = 0;
-
-    /* Wait until we're woken up by nbd_connection_entry.  */
-    s->requests[i].receiving = true;
-    qemu_coroutine_yield();
-    s->requests[i].receiving = false;
-    if (s->quit) {
-        error_setg(errp, "Connection closed");
-        return -EIO;
-    }
-    assert(s->ioc);
-
-    assert(s->reply.handle == handle);
-
-    if (nbd_reply_is_simple(&s->reply)) {
-        if (only_structured) {
-            error_setg(errp, "Protocol error: simple reply when structured "
-                             "reply chunk was expected");
-            return -EINVAL;
-        }
-
-        *request_ret = -nbd_errno_to_system_errno(s->reply.simple.error);
-        if (*request_ret < 0 || !qiov) {
-            return 0;
-        }
-
-        return qio_channel_readv_all(s->ioc, qiov->iov, qiov->niov,
-                                     errp) < 0 ? -EIO : 0;
-    }
-
-    /* handle structured reply chunk */
-    assert(s->info.structured_reply);
-    chunk = &s->reply.structured;
-
-    if (chunk->type == NBD_REPLY_TYPE_NONE) {
-        if (!(chunk->flags & NBD_REPLY_FLAG_DONE)) {
-            error_setg(errp, "Protocol error: NBD_REPLY_TYPE_NONE chunk without"
-                       " NBD_REPLY_FLAG_DONE flag set");
-            return -EINVAL;
-        }
-        if (chunk->length) {
-            error_setg(errp, "Protocol error: NBD_REPLY_TYPE_NONE chunk with"
-                       " nonzero length");
-            return -EINVAL;
-        }
-        return 0;
-    }
-
-    if (chunk->type == NBD_REPLY_TYPE_OFFSET_DATA) {
-        if (!qiov) {
-            error_setg(errp, "Unexpected NBD_REPLY_TYPE_OFFSET_DATA chunk");
-            return -EINVAL;
-        }
-
-        return nbd_co_receive_offset_data_payload(s, s->requests[i].offset,
-                                                  qiov, errp);
-    }
-
-    if (nbd_reply_type_is_error(chunk->type)) {
-        payload = &local_payload;
-    }
-
-    ret = nbd_co_receive_structured_payload(s, payload, errp);
-    if (ret < 0) {
-        return ret;
-    }
-
-    if (nbd_reply_type_is_error(chunk->type)) {
-        ret = nbd_parse_error_payload(chunk, local_payload, request_ret, errp);
-        g_free(local_payload);
-        return ret;
-    }
-
-    return 0;
-}
-
-/* nbd_co_receive_one_chunk
- * Read reply, wake up connection_co and set s->quit if needed.
- * Return value is a fatal error code or normal nbd reply error code
- */
-static coroutine_fn int nbd_co_receive_one_chunk(
-        NBDClientSession *s, uint64_t handle, bool only_structured,
-        int *request_ret, QEMUIOVector *qiov, NBDReply *reply, void **payload,
-        Error **errp)
-{
-    int ret = nbd_co_do_receive_one_chunk(s, handle, only_structured,
-                                          request_ret, qiov, payload, errp);
-
-    if (ret < 0) {
-        s->quit = true;
-    } else {
-        /* For assert at loop start in nbd_connection_entry */
-        if (reply) {
-            *reply = s->reply;
-        }
-        s->reply.handle = 0;
-    }
-
-    if (s->connection_co) {
-        aio_co_wake(s->connection_co);
-    }
-
-    return ret;
-}
-
-typedef struct NBDReplyChunkIter {
-    int ret;
-    int request_ret;
-    Error *err;
-    bool done, only_structured;
-} NBDReplyChunkIter;
-
-static void nbd_iter_channel_error(NBDReplyChunkIter *iter,
-                                   int ret, Error **local_err)
-{
-    assert(ret < 0);
-
-    if (!iter->ret) {
-        iter->ret = ret;
-        error_propagate(&iter->err, *local_err);
-    } else {
-        error_free(*local_err);
-    }
-
-    *local_err = NULL;
-}
-
-static void nbd_iter_request_error(NBDReplyChunkIter *iter, int ret)
-{
-    assert(ret < 0);
-
-    if (!iter->request_ret) {
-        iter->request_ret = ret;
-    }
-}
-
-/* NBD_FOREACH_REPLY_CHUNK
- */
-#define NBD_FOREACH_REPLY_CHUNK(s, iter, handle, structured, \
-                                qiov, reply, payload) \
-    for (iter = (NBDReplyChunkIter) { .only_structured = structured }; \
-         nbd_reply_chunk_iter_receive(s, &iter, handle, qiov, reply, payload);)
-
-/* nbd_reply_chunk_iter_receive
- */
-static bool nbd_reply_chunk_iter_receive(NBDClientSession *s,
-                                         NBDReplyChunkIter *iter,
-                                         uint64_t handle,
-                                         QEMUIOVector *qiov, NBDReply *reply,
-                                         void **payload)
-{
-    int ret, request_ret;
-    NBDReply local_reply;
-    NBDStructuredReplyChunk *chunk;
-    Error *local_err = NULL;
-    if (s->quit) {
-        error_setg(&local_err, "Connection closed");
-        nbd_iter_channel_error(iter, -EIO, &local_err);
-        goto break_loop;
-    }
-
-    if (iter->done) {
-        /* Previous iteration was last. */
-        goto break_loop;
-    }
-
-    if (reply == NULL) {
-        reply = &local_reply;
-    }
-
-    ret = nbd_co_receive_one_chunk(s, handle, iter->only_structured,
-                                   &request_ret, qiov, reply, payload,
-                                   &local_err);
-    if (ret < 0) {
-        nbd_iter_channel_error(iter, ret, &local_err);
-    } else if (request_ret < 0) {
-        nbd_iter_request_error(iter, request_ret);
-    }
-
-    /* Do not execute the body of NBD_FOREACH_REPLY_CHUNK for simple reply. */
-    if (nbd_reply_is_simple(reply) || s->quit) {
-        goto break_loop;
-    }
-
-    chunk = &reply->structured;
-    iter->only_structured = true;
-
-    if (chunk->type == NBD_REPLY_TYPE_NONE) {
-        /* NBD_REPLY_FLAG_DONE is already checked in nbd_co_receive_one_chunk */
-        assert(chunk->flags & NBD_REPLY_FLAG_DONE);
-        goto break_loop;
-    }
-
-    if (chunk->flags & NBD_REPLY_FLAG_DONE) {
-        /* This iteration is last. */
-        iter->done = true;
-    }
-
-    /* Execute the loop body */
-    return true;
-
-break_loop:
-    s->requests[HANDLE_TO_INDEX(s, handle)].coroutine = NULL;
-
-    qemu_co_mutex_lock(&s->send_mutex);
-    s->in_flight--;
-    qemu_co_queue_next(&s->free_sema);
-    qemu_co_mutex_unlock(&s->send_mutex);
-
-    return false;
-}
-
-static int nbd_co_receive_return_code(NBDClientSession *s, uint64_t handle,
-                                      int *request_ret, Error **errp)
-{
-    NBDReplyChunkIter iter;
-
-    NBD_FOREACH_REPLY_CHUNK(s, iter, handle, false, NULL, NULL, NULL) {
-        /* nbd_reply_chunk_iter_receive does all the work */
-    }
-
-    error_propagate(errp, iter.err);
-    *request_ret = iter.request_ret;
-    return iter.ret;
-}
-
-static int nbd_co_receive_cmdread_reply(NBDClientSession *s, uint64_t handle,
-                                        uint64_t offset, QEMUIOVector *qiov,
-                                        int *request_ret, Error **errp)
-{
-    NBDReplyChunkIter iter;
-    NBDReply reply;
-    void *payload = NULL;
-    Error *local_err = NULL;
-
-    NBD_FOREACH_REPLY_CHUNK(s, iter, handle, s->info.structured_reply,
-                            qiov, &reply, &payload)
-    {
-        int ret;
-        NBDStructuredReplyChunk *chunk = &reply.structured;
-
-        assert(nbd_reply_is_structured(&reply));
-
-        switch (chunk->type) {
-        case NBD_REPLY_TYPE_OFFSET_DATA:
-            /* special cased in nbd_co_receive_one_chunk, data is already
-             * in qiov */
-            break;
-        case NBD_REPLY_TYPE_OFFSET_HOLE:
-            ret = nbd_parse_offset_hole_payload(s, &reply.structured, payload,
-                                                offset, qiov, &local_err);
-            if (ret < 0) {
-                s->quit = true;
-                nbd_iter_channel_error(&iter, ret, &local_err);
-            }
-            break;
-        default:
-            if (!nbd_reply_type_is_error(chunk->type)) {
-                /* not allowed reply type */
-                s->quit = true;
-                error_setg(&local_err,
-                           "Unexpected reply type: %d (%s) for CMD_READ",
-                           chunk->type, nbd_reply_type_lookup(chunk->type));
-                nbd_iter_channel_error(&iter, -EINVAL, &local_err);
-            }
-        }
-
-        g_free(payload);
-        payload = NULL;
-    }
-
-    error_propagate(errp, iter.err);
-    *request_ret = iter.request_ret;
-    return iter.ret;
-}
-
-static int nbd_co_receive_blockstatus_reply(NBDClientSession *s,
-                                            uint64_t handle, uint64_t length,
-                                            NBDExtent *extent,
-                                            int *request_ret, Error **errp)
-{
-    NBDReplyChunkIter iter;
-    NBDReply reply;
-    void *payload = NULL;
-    Error *local_err = NULL;
-    bool received = false;
-
-    assert(!extent->length);
-    NBD_FOREACH_REPLY_CHUNK(s, iter, handle, false, NULL, &reply, &payload) {
-        int ret;
-        NBDStructuredReplyChunk *chunk = &reply.structured;
-
-        assert(nbd_reply_is_structured(&reply));
-
-        switch (chunk->type) {
-        case NBD_REPLY_TYPE_BLOCK_STATUS:
-            if (received) {
-                s->quit = true;
-                error_setg(&local_err, "Several BLOCK_STATUS chunks in reply");
-                nbd_iter_channel_error(&iter, -EINVAL, &local_err);
-            }
-            received = true;
-
-            ret = nbd_parse_blockstatus_payload(s, &reply.structured,
-                                                payload, length, extent,
-                                                &local_err);
-            if (ret < 0) {
-                s->quit = true;
-                nbd_iter_channel_error(&iter, ret, &local_err);
-            }
-            break;
-        default:
-            if (!nbd_reply_type_is_error(chunk->type)) {
-                s->quit = true;
-                error_setg(&local_err,
-                           "Unexpected reply type: %d (%s) "
-                           "for CMD_BLOCK_STATUS",
-                           chunk->type, nbd_reply_type_lookup(chunk->type));
-                nbd_iter_channel_error(&iter, -EINVAL, &local_err);
-            }
-        }
-
-        g_free(payload);
-        payload = NULL;
-    }
-
-    if (!extent->length && !iter.request_ret) {
-        error_setg(&local_err, "Server did not reply with any status extents");
-        nbd_iter_channel_error(&iter, -EIO, &local_err);
-    }
-
-    error_propagate(errp, iter.err);
-    *request_ret = iter.request_ret;
-    return iter.ret;
-}
-
-static int nbd_co_request(BlockDriverState *bs, NBDRequest *request,
-                          QEMUIOVector *write_qiov)
-{
-    int ret, request_ret;
-    Error *local_err = NULL;
-    NBDClientSession *client = nbd_get_client_session(bs);
-
-    assert(request->type != NBD_CMD_READ);
-    if (write_qiov) {
-        assert(request->type == NBD_CMD_WRITE);
-        assert(request->len == iov_size(write_qiov->iov, write_qiov->niov));
-    } else {
-        assert(request->type != NBD_CMD_WRITE);
-    }
-    ret = nbd_co_send_request(bs, request, write_qiov);
-    if (ret < 0) {
-        return ret;
-    }
-
-    ret = nbd_co_receive_return_code(client, request->handle,
-                                     &request_ret, &local_err);
-    if (local_err) {
-        trace_nbd_co_request_fail(request->from, request->len, request->handle,
-                                  request->flags, request->type,
-                                  nbd_cmd_lookup(request->type),
-                                  ret, error_get_pretty(local_err));
-        error_free(local_err);
-    }
-    return ret ? ret : request_ret;
-}
-
-int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
-                         uint64_t bytes, QEMUIOVector *qiov, int flags)
-{
-    int ret, request_ret;
-    Error *local_err = NULL;
-    NBDClientSession *client = nbd_get_client_session(bs);
-    NBDRequest request = {
-        .type = NBD_CMD_READ,
-        .from = offset,
-        .len = bytes,
-    };
-
-    assert(bytes <= NBD_MAX_BUFFER_SIZE);
-    assert(!flags);
-
-    if (!bytes) {
-        return 0;
-    }
-    /*
-     * Work around the fact that the block layer doesn't do
-     * byte-accurate sizing yet - if the read exceeds the server's
-     * advertised size because the block layer rounded size up, then
-     * truncate the request to the server and tail-pad with zero.
-     */
-    if (offset >= client->info.size) {
-        assert(bytes < BDRV_SECTOR_SIZE);
-        qemu_iovec_memset(qiov, 0, 0, bytes);
-        return 0;
-    }
-    if (offset + bytes > client->info.size) {
-        uint64_t slop = offset + bytes - client->info.size;
-
-        assert(slop < BDRV_SECTOR_SIZE);
-        qemu_iovec_memset(qiov, bytes - slop, 0, slop);
-        request.len -= slop;
-    }
-
-    ret = nbd_co_send_request(bs, &request, NULL);
-    if (ret < 0) {
-        return ret;
-    }
-
-    ret = nbd_co_receive_cmdread_reply(client, request.handle, offset, qiov,
-                                       &request_ret, &local_err);
-    if (local_err) {
-        trace_nbd_co_request_fail(request.from, request.len, request.handle,
-                                  request.flags, request.type,
-                                  nbd_cmd_lookup(request.type),
-                                  ret, error_get_pretty(local_err));
-        error_free(local_err);
-    }
-    return ret ? ret : request_ret;
-}
-
-int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
-                          uint64_t bytes, QEMUIOVector *qiov, int flags)
-{
-    NBDClientSession *client = nbd_get_client_session(bs);
-    NBDRequest request = {
-        .type = NBD_CMD_WRITE,
-        .from = offset,
-        .len = bytes,
-    };
-
-    assert(!(client->info.flags & NBD_FLAG_READ_ONLY));
-    if (flags & BDRV_REQ_FUA) {
-        assert(client->info.flags & NBD_FLAG_SEND_FUA);
-        request.flags |= NBD_CMD_FLAG_FUA;
-    }
-
-    assert(bytes <= NBD_MAX_BUFFER_SIZE);
-
-    if (!bytes) {
-        return 0;
-    }
-    return nbd_co_request(bs, &request, qiov);
-}
-
-int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
-                                int bytes, BdrvRequestFlags flags)
-{
-    NBDClientSession *client = nbd_get_client_session(bs);
-    NBDRequest request = {
-        .type = NBD_CMD_WRITE_ZEROES,
-        .from = offset,
-        .len = bytes,
-    };
-
-    assert(!(client->info.flags & NBD_FLAG_READ_ONLY));
-    if (!(client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES)) {
-        return -ENOTSUP;
-    }
-
-    if (flags & BDRV_REQ_FUA) {
-        assert(client->info.flags & NBD_FLAG_SEND_FUA);
-        request.flags |= NBD_CMD_FLAG_FUA;
-    }
-    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
-        request.flags |= NBD_CMD_FLAG_NO_HOLE;
-    }
-
-    if (!bytes) {
-        return 0;
-    }
-    return nbd_co_request(bs, &request, NULL);
-}
-
-int nbd_client_co_flush(BlockDriverState *bs)
-{
-    NBDClientSession *client = nbd_get_client_session(bs);
-    NBDRequest request = { .type = NBD_CMD_FLUSH };
-
-    if (!(client->info.flags & NBD_FLAG_SEND_FLUSH)) {
-        return 0;
-    }
-
-    request.from = 0;
-    request.len = 0;
-
-    return nbd_co_request(bs, &request, NULL);
-}
-
-int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
-{
-    NBDClientSession *client = nbd_get_client_session(bs);
-    NBDRequest request = {
-        .type = NBD_CMD_TRIM,
-        .from = offset,
-        .len = bytes,
-    };
-
-    assert(!(client->info.flags & NBD_FLAG_READ_ONLY));
-    if (!(client->info.flags & NBD_FLAG_SEND_TRIM) || !bytes) {
-        return 0;
-    }
-
-    return nbd_co_request(bs, &request, NULL);
-}
-
-int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs,
-                                            bool want_zero,
-                                            int64_t offset, int64_t bytes,
-                                            int64_t *pnum, int64_t *map,
-                                            BlockDriverState **file)
-{
-    int ret, request_ret;
-    NBDExtent extent = { 0 };
-    NBDClientSession *client = nbd_get_client_session(bs);
-    Error *local_err = NULL;
-
-    NBDRequest request = {
-        .type = NBD_CMD_BLOCK_STATUS,
-        .from = offset,
-        .len = MIN(MIN_NON_ZERO(QEMU_ALIGN_DOWN(INT_MAX,
-                                                bs->bl.request_alignment),
-                                client->info.max_block),
-                   MIN(bytes, client->info.size - offset)),
-        .flags = NBD_CMD_FLAG_REQ_ONE,
-    };
-
-    if (!client->info.base_allocation) {
-        *pnum = bytes;
-        *map = offset;
-        *file = bs;
-        return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
-    }
-
-    /*
-     * Work around the fact that the block layer doesn't do
-     * byte-accurate sizing yet - if the status request exceeds the
-     * server's advertised size because the block layer rounded size
-     * up, we truncated the request to the server (above), or are
-     * called on just the hole.
-     */
-    if (offset >= client->info.size) {
-        *pnum = bytes;
-        assert(bytes < BDRV_SECTOR_SIZE);
-        /* Intentionally don't report offset_valid for the hole */
-        return BDRV_BLOCK_ZERO;
-    }
-
-    if (client->info.min_block) {
-        assert(QEMU_IS_ALIGNED(request.len, client->info.min_block));
-    }
-    ret = nbd_co_send_request(bs, &request, NULL);
-    if (ret < 0) {
-        return ret;
-    }
-
-    ret = nbd_co_receive_blockstatus_reply(client, request.handle, bytes,
-                                           &extent, &request_ret, &local_err);
-    if (local_err) {
-        trace_nbd_co_request_fail(request.from, request.len, request.handle,
-                                  request.flags, request.type,
-                                  nbd_cmd_lookup(request.type),
-                                  ret, error_get_pretty(local_err));
-        error_free(local_err);
-    }
-    if (ret < 0 || request_ret < 0) {
-        return ret ? ret : request_ret;
-    }
-
-    assert(extent.length);
-    *pnum = extent.length;
-    *map = offset;
-    *file = bs;
-    return (extent.flags & NBD_STATE_HOLE ? 0 : BDRV_BLOCK_DATA) |
-        (extent.flags & NBD_STATE_ZERO ? BDRV_BLOCK_ZERO : 0) |
-        BDRV_BLOCK_OFFSET_VALID;
-}
-
-void nbd_client_detach_aio_context(BlockDriverState *bs)
-{
-    NBDClientSession *client = nbd_get_client_session(bs);
-    qio_channel_detach_aio_context(QIO_CHANNEL(client->ioc));
-}
-
-static void nbd_client_attach_aio_context_bh(void *opaque)
-{
-    BlockDriverState *bs = opaque;
-    NBDClientSession *client = nbd_get_client_session(bs);
-
-    /* The node is still drained, so we know the coroutine has yielded in
-     * nbd_read_eof(), the only place where bs->in_flight can reach 0, or it is
-     * entered for the first time. Both places are safe for entering the
-     * coroutine.*/
-    qemu_aio_coroutine_enter(bs->aio_context, client->connection_co);
-    bdrv_dec_in_flight(bs);
-}
-
-void nbd_client_attach_aio_context(BlockDriverState *bs,
-                                   AioContext *new_context)
-{
-    NBDClientSession *client = nbd_get_client_session(bs);
-    qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc), new_context);
-
-    bdrv_inc_in_flight(bs);
-
-    /* Need to wait here for the BH to run because the BH must run while the
-     * node is still drained. */
-    aio_wait_bh_oneshot(new_context, nbd_client_attach_aio_context_bh, bs);
-}
-
-void nbd_client_close(BlockDriverState *bs)
-{
-    NBDClientSession *client = nbd_get_client_session(bs);
-    NBDRequest request = { .type = NBD_CMD_DISC };
-
-    assert(client->ioc);
-
-    nbd_send_request(client->ioc, &request);
-
-    nbd_teardown_connection(bs);
-}
-
-static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
-                                                  Error **errp)
-{
-    QIOChannelSocket *sioc;
-    Error *local_err = NULL;
-
-    sioc = qio_channel_socket_new();
-    qio_channel_set_name(QIO_CHANNEL(sioc), "nbd-client");
-
-    qio_channel_socket_connect_sync(sioc, saddr, &local_err);
-    if (local_err) {
-        object_unref(OBJECT(sioc));
-        error_propagate(errp, local_err);
-        return NULL;
-    }
-
-    qio_channel_set_delay(QIO_CHANNEL(sioc), false);
-
-    return sioc;
-}
-
-static int nbd_client_connect(BlockDriverState *bs,
-                              SocketAddress *saddr,
-                              const char *export,
-                              QCryptoTLSCreds *tlscreds,
-                              const char *hostname,
-                              const char *x_dirty_bitmap,
-                              Error **errp)
-{
-    NBDClientSession *client = nbd_get_client_session(bs);
-    int ret;
-
-    /*
-     * establish TCP connection, return error if it fails
-     * TODO: Configurable retry-until-timeout behaviour.
-     */
-    QIOChannelSocket *sioc = nbd_establish_connection(saddr, errp);
-
-    if (!sioc) {
-        return -ECONNREFUSED;
-    }
-
-    /* NBD handshake */
-    trace_nbd_client_connect(export);
-    qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
-
-    client->info.request_sizes = true;
-    client->info.structured_reply = true;
-    client->info.base_allocation = true;
-    client->info.x_dirty_bitmap = g_strdup(x_dirty_bitmap);
-    client->info.name = g_strdup(export ?: "");
-    ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), tlscreds, hostname,
-                                &client->ioc, &client->info, errp);
-    g_free(client->info.x_dirty_bitmap);
-    g_free(client->info.name);
-    if (ret < 0) {
-        object_unref(OBJECT(sioc));
-        return ret;
-    }
-    if (x_dirty_bitmap && !client->info.base_allocation) {
-        error_setg(errp, "requested x-dirty-bitmap %s not found",
-                   x_dirty_bitmap);
-        ret = -EINVAL;
-        goto fail;
-    }
-    if (client->info.flags & NBD_FLAG_READ_ONLY) {
-        ret = bdrv_apply_auto_read_only(bs, "NBD export is read-only", errp);
-        if (ret < 0) {
-            goto fail;
-        }
-    }
-    if (client->info.flags & NBD_FLAG_SEND_FUA) {
-        bs->supported_write_flags = BDRV_REQ_FUA;
-        bs->supported_zero_flags |= BDRV_REQ_FUA;
-    }
-    if (client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) {
-        bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
-    }
-
-    client->sioc = sioc;
-
-    if (!client->ioc) {
-        client->ioc = QIO_CHANNEL(sioc);
-        object_ref(OBJECT(client->ioc));
-    }
-
-    /* Now that we're connected, set the socket to be non-blocking and
-     * kick the reply mechanism.  */
-    qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
-    client->connection_co = qemu_coroutine_create(nbd_connection_entry, client);
-    bdrv_inc_in_flight(bs);
-    nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
-
-    trace_nbd_client_connect_success(export);
-
-    return 0;
-
- fail:
-    /*
-     * We have connected, but must fail for other reasons. The
-     * connection is still blocking; send NBD_CMD_DISC as a courtesy
-     * to the server.
-     */
-    {
-        NBDRequest request = { .type = NBD_CMD_DISC };
-
-        nbd_send_request(client->ioc ?: QIO_CHANNEL(sioc), &request);
-
-        object_unref(OBJECT(sioc));
-
-        return ret;
-    }
-}
-
-int nbd_client_init(BlockDriverState *bs,
-                    SocketAddress *saddr,
-                    const char *export,
-                    QCryptoTLSCreds *tlscreds,
-                    const char *hostname,
-                    const char *x_dirty_bitmap,
-                    Error **errp)
-{
-    NBDClientSession *client = nbd_get_client_session(bs);
-
-    client->bs = bs;
-    qemu_co_mutex_init(&client->send_mutex);
-    qemu_co_queue_init(&client->free_sema);
-
-    return nbd_client_connect(bs, saddr, export, tlscreds, hostname,
-                              x_dirty_bitmap, errp);
-}
diff --git a/block/nbd-client.h b/block/nbd-client.h
deleted file mode 100644
index 570538f4c8..0000000000
--- a/block/nbd-client.h
+++ /dev/null
@@ -1,62 +0,0 @@
-#ifndef NBD_CLIENT_H
-#define NBD_CLIENT_H
-
-#include "block/nbd.h"
-#include "block/block_int.h"
-#include "io/channel-socket.h"
-
-#define MAX_NBD_REQUESTS    16
-
-typedef struct {
-    Coroutine *coroutine;
-    uint64_t offset;        /* original offset of the request */
-    bool receiving;         /* waiting for connection_co? */
-} NBDClientRequest;
-
-typedef struct NBDClientSession {
-    QIOChannelSocket *sioc; /* The master data channel */
-    QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
-    NBDExportInfo info;
-
-    CoMutex send_mutex;
-    CoQueue free_sema;
-    Coroutine *connection_co;
-    int in_flight;
-
-    NBDClientRequest requests[MAX_NBD_REQUESTS];
-    NBDReply reply;
-    BlockDriverState *bs;
-    bool quit;
-} NBDClientSession;
-
-NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
-
-int nbd_client_init(BlockDriverState *bs,
-                    SocketAddress *saddr,
-                    const char *export_name,
-                    QCryptoTLSCreds *tlscreds,
-                    const char *hostname,
-                    const char *x_dirty_bitmap,
-                    Error **errp);
-void nbd_client_close(BlockDriverState *bs);
-
-int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);
-int nbd_client_co_flush(BlockDriverState *bs);
-int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
-                          uint64_t bytes, QEMUIOVector *qiov, int flags);
-int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
-                                int bytes, BdrvRequestFlags flags);
-int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
-                         uint64_t bytes, QEMUIOVector *qiov, int flags);
-
-void nbd_client_detach_aio_context(BlockDriverState *bs);
-void nbd_client_attach_aio_context(BlockDriverState *bs,
-                                   AioContext *new_context);
-
-int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs,
-                                            bool want_zero,
-                                            int64_t offset, int64_t bytes,
-                                            int64_t *pnum, int64_t *map,
-                                            BlockDriverState **file);
-
-#endif /* NBD_CLIENT_H */
diff --git a/block/nbd.c b/block/nbd.c
index 208be59602..1f00be2d66 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -1,6 +1,7 @@
 /*
  * QEMU Block driver for  NBD
  *
+ * Copyright (C) 2016 Red Hat, Inc.
  * Copyright (C) 2008 Bull S.A.S.
  *     Author: Laurent Vivier <Laurent.Vivier@bull.net>
  *
@@ -27,21 +28,46 @@
  */
 
 #include "qemu/osdep.h"
-#include "nbd-client.h"
-#include "block/qdict.h"
-#include "qapi/error.h"
+
+#include "trace.h"
 #include "qemu/uri.h"
-#include "block/block_int.h"
-#include "qemu/module.h"
 #include "qemu/option.h"
-#include "qapi/qapi-visit-sockets.h"
-#include "qapi/qobject-input-visitor.h"
-#include "qapi/qobject-output-visitor.h"
-#include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qstring.h"
 #include "qemu/cutils.h"
 
+#include "qapi/qapi-visit-sockets.h"
+#include "qapi/qmp/qstring.h"
+
+#include "block/qdict.h"
+#include "block/nbd.h"
+#include "block/block_int.h"
+
 #define EN_OPTSTR ":exportname="
+#define MAX_NBD_REQUESTS    16
+
+#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ (uint64_t)(intptr_t)(bs))
+#define INDEX_TO_HANDLE(bs, index)  ((index)  ^ (uint64_t)(intptr_t)(bs))
+
+typedef struct {
+    Coroutine *coroutine;
+    uint64_t offset;        /* original offset of the request */
+    bool receiving;         /* waiting for connection_co? */
+} NBDClientRequest;
+
+typedef struct NBDClientSession {
+    QIOChannelSocket *sioc; /* The master data channel */
+    QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
+    NBDExportInfo info;
+
+    CoMutex send_mutex;
+    CoQueue free_sema;
+    Coroutine *connection_co;
+    int in_flight;
+
+    NBDClientRequest requests[MAX_NBD_REQUESTS];
+    NBDReply reply;
+    BlockDriverState *bs;
+    bool quit;
+} NBDClientSession;
 
 typedef struct BDRVNBDState {
     NBDClientSession client;
@@ -51,6 +77,1218 @@ typedef struct BDRVNBDState {
     char *export, *tlscredsid;
 } BDRVNBDState;
 
+static NBDClientSession *nbd_get_client_session(BlockDriverState *bs)
+{
+    BDRVNBDState *s = bs->opaque;
+    return &s->client;
+}
+
+
+static void nbd_recv_coroutines_wake_all(NBDClientSession *s)
+{
+    int i;
+
+    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
+        NBDClientRequest *req = &s->requests[i];
+
+        if (req->coroutine && req->receiving) {
+            aio_co_wake(req->coroutine);
+        }
+    }
+}
+
+static void nbd_client_detach_aio_context(BlockDriverState *bs)
+{
+    NBDClientSession *client = nbd_get_client_session(bs);
+    qio_channel_detach_aio_context(QIO_CHANNEL(client->ioc));
+}
+
+static void nbd_client_attach_aio_context_bh(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+    NBDClientSession *client = nbd_get_client_session(bs);
+
+    /*
+     * The node is still drained, so we know the coroutine has yielded in
+     * nbd_read_eof(), the only place where bs->in_flight can reach 0, or it is
+     * entered for the first time. Both places are safe for entering the
+     * coroutine.
+     */
+    qemu_aio_coroutine_enter(bs->aio_context, client->connection_co);
+    bdrv_dec_in_flight(bs);
+}
+
+static void nbd_client_attach_aio_context(BlockDriverState *bs,
+                                          AioContext *new_context)
+{
+    NBDClientSession *client = nbd_get_client_session(bs);
+    qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc), new_context);
+
+    bdrv_inc_in_flight(bs);
+
+    /*
+     * Need to wait here for the BH to run because the BH must run while the
+     * node is still drained.
+     */
+    aio_wait_bh_oneshot(new_context, nbd_client_attach_aio_context_bh, bs);
+}
+
+
+static void nbd_teardown_connection(BlockDriverState *bs)
+{
+    NBDClientSession *client = nbd_get_client_session(bs);
+
+    assert(client->ioc);
+
+    /* finish any pending coroutines */
+    qio_channel_shutdown(client->ioc,
+                         QIO_CHANNEL_SHUTDOWN_BOTH,
+                         NULL);
+    BDRV_POLL_WHILE(bs, client->connection_co);
+
+    nbd_client_detach_aio_context(bs);
+    object_unref(OBJECT(client->sioc));
+    client->sioc = NULL;
+    object_unref(OBJECT(client->ioc));
+    client->ioc = NULL;
+}
+
+static coroutine_fn void nbd_connection_entry(void *opaque)
+{
+    NBDClientSession *s = opaque;
+    uint64_t i;
+    int ret = 0;
+    Error *local_err = NULL;
+
+    while (!s->quit) {
+        /*
+         * The NBD client can only really be considered idle when it has
+         * yielded from qio_channel_readv_all_eof(), waiting for data. This is
+         * the point where the additional scheduled coroutine entry happens
+         * after nbd_client_attach_aio_context().
+         *
+         * Therefore we keep an additional in_flight reference all the time and
+         * only drop it temporarily here.
+         */
+        assert(s->reply.handle == 0);
+        ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, &local_err);
+
+        if (local_err) {
+            trace_nbd_read_reply_entry_fail(ret, error_get_pretty(local_err));
+            error_free(local_err);
+        }
+        if (ret <= 0) {
+            break;
+        }
+
+        /*
+         * There's no need for a mutex on the receive side, because the
+         * handler acts as a synchronization point and ensures that only
+         * one coroutine is called until the reply finishes.
+         */
+        i = HANDLE_TO_INDEX(s, s->reply.handle);
+        if (i >= MAX_NBD_REQUESTS ||
+            !s->requests[i].coroutine ||
+            !s->requests[i].receiving ||
+            (nbd_reply_is_structured(&s->reply) && !s->info.structured_reply))
+        {
+            break;
+        }
+
+        /*
+         * We're woken up again by the request itself.  Note that there
+         * is no race between yielding and reentering connection_co.  This
+         * is because:
+         *
+         * - if the request runs on the same AioContext, it is only
+         *   entered after we yield
+         *
+         * - if the request runs on a different AioContext, reentering
+         *   connection_co happens through a bottom half, which can only
+         *   run after we yield.
+         */
+        aio_co_wake(s->requests[i].coroutine);
+        qemu_coroutine_yield();
+    }
+
+    s->quit = true;
+    nbd_recv_coroutines_wake_all(s);
+    bdrv_dec_in_flight(s->bs);
+
+    s->connection_co = NULL;
+    aio_wait_kick();
+}
+
+static int nbd_co_send_request(BlockDriverState *bs,
+                               NBDRequest *request,
+                               QEMUIOVector *qiov)
+{
+    NBDClientSession *s = nbd_get_client_session(bs);
+    int rc, i;
+
+    qemu_co_mutex_lock(&s->send_mutex);
+    while (s->in_flight == MAX_NBD_REQUESTS) {
+        qemu_co_queue_wait(&s->free_sema, &s->send_mutex);
+    }
+    s->in_flight++;
+
+    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
+        if (s->requests[i].coroutine == NULL) {
+            break;
+        }
+    }
+
+    g_assert(qemu_in_coroutine());
+    assert(i < MAX_NBD_REQUESTS);
+
+    s->requests[i].coroutine = qemu_coroutine_self();
+    s->requests[i].offset = request->from;
+    s->requests[i].receiving = false;
+
+    request->handle = INDEX_TO_HANDLE(s, i);
+
+    if (s->quit) {
+        rc = -EIO;
+        goto err;
+    }
+    assert(s->ioc);
+
+    if (qiov) {
+        qio_channel_set_cork(s->ioc, true);
+        rc = nbd_send_request(s->ioc, request);
+        if (rc >= 0 && !s->quit) {
+            if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov,
+                                       NULL) < 0) {
+                rc = -EIO;
+            }
+        } else if (rc >= 0) {
+            rc = -EIO;
+        }
+        qio_channel_set_cork(s->ioc, false);
+    } else {
+        rc = nbd_send_request(s->ioc, request);
+    }
+
+err:
+    if (rc < 0) {
+        s->quit = true;
+        s->requests[i].coroutine = NULL;
+        s->in_flight--;
+        qemu_co_queue_next(&s->free_sema);
+    }
+    qemu_co_mutex_unlock(&s->send_mutex);
+    return rc;
+}
+
+static inline uint16_t payload_advance16(uint8_t **payload)
+{
+    *payload += 2;
+    return lduw_be_p(*payload - 2);
+}
+
+static inline uint32_t payload_advance32(uint8_t **payload)
+{
+    *payload += 4;
+    return ldl_be_p(*payload - 4);
+}
+
+static inline uint64_t payload_advance64(uint8_t **payload)
+{
+    *payload += 8;
+    return ldq_be_p(*payload - 8);
+}
+
+static int nbd_parse_offset_hole_payload(NBDClientSession *client,
+                                         NBDStructuredReplyChunk *chunk,
+                                         uint8_t *payload, uint64_t orig_offset,
+                                         QEMUIOVector *qiov, Error **errp)
+{
+    uint64_t offset;
+    uint32_t hole_size;
+
+    if (chunk->length != sizeof(offset) + sizeof(hole_size)) {
+        error_setg(errp, "Protocol error: invalid payload for "
+                         "NBD_REPLY_TYPE_OFFSET_HOLE");
+        return -EINVAL;
+    }
+
+    offset = payload_advance64(&payload);
+    hole_size = payload_advance32(&payload);
+
+    if (!hole_size || offset < orig_offset || hole_size > qiov->size ||
+        offset > orig_offset + qiov->size - hole_size) {
+        error_setg(errp, "Protocol error: server sent chunk exceeding requested"
+                         " region");
+        return -EINVAL;
+    }
+    if (client->info.min_block &&
+        !QEMU_IS_ALIGNED(hole_size, client->info.min_block)) {
+        trace_nbd_structured_read_compliance("hole");
+    }
+
+    qemu_iovec_memset(qiov, offset - orig_offset, 0, hole_size);
+
+    return 0;
+}
+
+/*
+ * nbd_parse_blockstatus_payload
+ * Based on our request, we expect only one extent in reply, for the
+ * base:allocation context.
+ */
+static int nbd_parse_blockstatus_payload(NBDClientSession *client,
+                                         NBDStructuredReplyChunk *chunk,
+                                         uint8_t *payload, uint64_t orig_length,
+                                         NBDExtent *extent, Error **errp)
+{
+    uint32_t context_id;
+
+    /* The server succeeded, so it must have sent [at least] one extent */
+    if (chunk->length < sizeof(context_id) + sizeof(*extent)) {
+        error_setg(errp, "Protocol error: invalid payload for "
+                         "NBD_REPLY_TYPE_BLOCK_STATUS");
+        return -EINVAL;
+    }
+
+    context_id = payload_advance32(&payload);
+    if (client->info.context_id != context_id) {
+        error_setg(errp, "Protocol error: unexpected context id %d for "
+                         "NBD_REPLY_TYPE_BLOCK_STATUS, when negotiated context "
+                         "id is %d", context_id,
+                         client->info.context_id);
+        return -EINVAL;
+    }
+
+    extent->length = payload_advance32(&payload);
+    extent->flags = payload_advance32(&payload);
+
+    if (extent->length == 0) {
+        error_setg(errp, "Protocol error: server sent status chunk with "
+                   "zero length");
+        return -EINVAL;
+    }
+
+    /*
+     * A server sending unaligned block status is in violation of the
+     * protocol, but as qemu-nbd 3.1 is such a server (at least for
+     * POSIX files that are not a multiple of 512 bytes, since qemu
+     * rounds files up to 512-byte multiples but lseek(SEEK_HOLE)
+     * still sees an implicit hole beyond the real EOF), it's nicer to
+     * work around the misbehaving server. If the request included
+     * more than the final unaligned block, truncate it back to an
+     * aligned result; if the request was only the final block, round
+     * up to the full block and change the status to fully-allocated
+     * (always a safe status, even if it loses information).
+     */
+    if (client->info.min_block && !QEMU_IS_ALIGNED(extent->length,
+                                                   client->info.min_block)) {
+        trace_nbd_parse_blockstatus_compliance("extent length is unaligned");
+        if (extent->length > client->info.min_block) {
+            extent->length = QEMU_ALIGN_DOWN(extent->length,
+                                             client->info.min_block);
+        } else {
+            extent->length = client->info.min_block;
+            extent->flags = 0;
+        }
+    }
+
+    /*
+     * We used NBD_CMD_FLAG_REQ_ONE, so the server should not have
+     * sent us any more than one extent, nor should it have included
+     * status beyond our request in that extent. However, it's easy
+     * enough to ignore the server's noncompliance without killing the
+     * connection; just ignore trailing extents, and clamp things to
+     * the length of our request.
+     */
+    if (chunk->length > sizeof(context_id) + sizeof(*extent)) {
+        trace_nbd_parse_blockstatus_compliance("more than one extent");
+    }
+    if (extent->length > orig_length) {
+        extent->length = orig_length;
+        trace_nbd_parse_blockstatus_compliance("extent length too large");
+    }
+
+    return 0;
+}
+
+/*
+ * nbd_parse_error_payload
+ * on success @errp contains message describing nbd error reply
+ */
+static int nbd_parse_error_payload(NBDStructuredReplyChunk *chunk,
+                                   uint8_t *payload, int *request_ret,
+                                   Error **errp)
+{
+    uint32_t error;
+    uint16_t message_size;
+
+    assert(chunk->type & (1 << 15));
+
+    if (chunk->length < sizeof(error) + sizeof(message_size)) {
+        error_setg(errp,
+                   "Protocol error: invalid payload for structured error");
+        return -EINVAL;
+    }
+
+    error = nbd_errno_to_system_errno(payload_advance32(&payload));
+    if (error == 0) {
+        error_setg(errp, "Protocol error: server sent structured error chunk "
+                         "with error = 0");
+        return -EINVAL;
+    }
+
+    *request_ret = -error;
+    message_size = payload_advance16(&payload);
+
+    if (message_size > chunk->length - sizeof(error) - sizeof(message_size)) {
+        error_setg(errp, "Protocol error: server sent structured error chunk "
+                         "with incorrect message size");
+        return -EINVAL;
+    }
+
+    /* TODO: Add a trace point to mention the server complaint */
+
+    /* TODO handle ERROR_OFFSET */
+
+    return 0;
+}
+
+static int nbd_co_receive_offset_data_payload(NBDClientSession *s,
+                                              uint64_t orig_offset,
+                                              QEMUIOVector *qiov, Error **errp)
+{
+    QEMUIOVector sub_qiov;
+    uint64_t offset;
+    size_t data_size;
+    int ret;
+    NBDStructuredReplyChunk *chunk = &s->reply.structured;
+
+    assert(nbd_reply_is_structured(&s->reply));
+
+    /* The NBD spec requires at least one byte of payload */
+    if (chunk->length <= sizeof(offset)) {
+        error_setg(errp, "Protocol error: invalid payload for "
+                         "NBD_REPLY_TYPE_OFFSET_DATA");
+        return -EINVAL;
+    }
+
+    if (nbd_read64(s->ioc, &offset, "OFFSET_DATA offset", errp) < 0) {
+        return -EIO;
+    }
+
+    data_size = chunk->length - sizeof(offset);
+    assert(data_size);
+    if (offset < orig_offset || data_size > qiov->size ||
+        offset > orig_offset + qiov->size - data_size) {
+        error_setg(errp, "Protocol error: server sent chunk exceeding requested"
+                         " region");
+        return -EINVAL;
+    }
+    if (s->info.min_block && !QEMU_IS_ALIGNED(data_size, s->info.min_block)) {
+        trace_nbd_structured_read_compliance("data");
+    }
+
+    qemu_iovec_init(&sub_qiov, qiov->niov);
+    qemu_iovec_concat(&sub_qiov, qiov, offset - orig_offset, data_size);
+    ret = qio_channel_readv_all(s->ioc, sub_qiov.iov, sub_qiov.niov, errp);
+    qemu_iovec_destroy(&sub_qiov);
+
+    return ret < 0 ? -EIO : 0;
+}
+
+#define NBD_MAX_MALLOC_PAYLOAD 1000
+static coroutine_fn int nbd_co_receive_structured_payload(
+        NBDClientSession *s, void **payload, Error **errp)
+{
+    int ret;
+    uint32_t len;
+
+    assert(nbd_reply_is_structured(&s->reply));
+
+    len = s->reply.structured.length;
+
+    if (len == 0) {
+        return 0;
+    }
+
+    if (payload == NULL) {
+        error_setg(errp, "Unexpected structured payload");
+        return -EINVAL;
+    }
+
+    if (len > NBD_MAX_MALLOC_PAYLOAD) {
+        error_setg(errp, "Payload too large");
+        return -EINVAL;
+    }
+
+    *payload = g_new(char, len);
+    ret = nbd_read(s->ioc, *payload, len, "structured payload", errp);
+    if (ret < 0) {
+        g_free(*payload);
+        *payload = NULL;
+        return ret;
+    }
+
+    return 0;
+}
+
+/*
+ * nbd_co_do_receive_one_chunk
+ * for simple reply:
+ *   set request_ret to received reply error
+ *   if qiov is not NULL: read payload to @qiov
+ * for structured reply chunk:
+ *   if error chunk: read payload, set @request_ret, do not set @payload
+ *   else if offset_data chunk: read payload data to @qiov, do not set @payload
+ *   else: read payload to @payload
+ *
+ * If function fails, @errp contains corresponding error message, and the
+ * connection with the server is suspect.  If it returns 0, then the
+ * transaction succeeded (although @request_ret may be a negative errno
+ * corresponding to the server's error reply), and errp is unchanged.
+ */
+static coroutine_fn int nbd_co_do_receive_one_chunk(
+        NBDClientSession *s, uint64_t handle, bool only_structured,
+        int *request_ret, QEMUIOVector *qiov, void **payload, Error **errp)
+{
+    int ret;
+    int i = HANDLE_TO_INDEX(s, handle);
+    void *local_payload = NULL;
+    NBDStructuredReplyChunk *chunk;
+
+    if (payload) {
+        *payload = NULL;
+    }
+    *request_ret = 0;
+
+    /* Wait until we're woken up by nbd_connection_entry.  */
+    s->requests[i].receiving = true;
+    qemu_coroutine_yield();
+    s->requests[i].receiving = false;
+    if (s->quit) {
+        error_setg(errp, "Connection closed");
+        return -EIO;
+    }
+    assert(s->ioc);
+
+    assert(s->reply.handle == handle);
+
+    if (nbd_reply_is_simple(&s->reply)) {
+        if (only_structured) {
+            error_setg(errp, "Protocol error: simple reply when structured "
+                             "reply chunk was expected");
+            return -EINVAL;
+        }
+
+        *request_ret = -nbd_errno_to_system_errno(s->reply.simple.error);
+        if (*request_ret < 0 || !qiov) {
+            return 0;
+        }
+
+        return qio_channel_readv_all(s->ioc, qiov->iov, qiov->niov,
+                                     errp) < 0 ? -EIO : 0;
+    }
+
+    /* handle structured reply chunk */
+    assert(s->info.structured_reply);
+    chunk = &s->reply.structured;
+
+    if (chunk->type == NBD_REPLY_TYPE_NONE) {
+        if (!(chunk->flags & NBD_REPLY_FLAG_DONE)) {
+            error_setg(errp, "Protocol error: NBD_REPLY_TYPE_NONE chunk without"
+                       " NBD_REPLY_FLAG_DONE flag set");
+            return -EINVAL;
+        }
+        if (chunk->length) {
+            error_setg(errp, "Protocol error: NBD_REPLY_TYPE_NONE chunk with"
+                       " nonzero length");
+            return -EINVAL;
+        }
+        return 0;
+    }
+
+    if (chunk->type == NBD_REPLY_TYPE_OFFSET_DATA) {
+        if (!qiov) {
+            error_setg(errp, "Unexpected NBD_REPLY_TYPE_OFFSET_DATA chunk");
+            return -EINVAL;
+        }
+
+        return nbd_co_receive_offset_data_payload(s, s->requests[i].offset,
+                                                  qiov, errp);
+    }
+
+    if (nbd_reply_type_is_error(chunk->type)) {
+        payload = &local_payload;
+    }
+
+    ret = nbd_co_receive_structured_payload(s, payload, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (nbd_reply_type_is_error(chunk->type)) {
+        ret = nbd_parse_error_payload(chunk, local_payload, request_ret, errp);
+        g_free(local_payload);
+        return ret;
+    }
+
+    return 0;
+}
+
+/*
+ * nbd_co_receive_one_chunk
+ * Read reply, wake up connection_co and set s->quit if needed.
+ * Return value is a fatal error code or normal nbd reply error code
+ */
+static coroutine_fn int nbd_co_receive_one_chunk(
+        NBDClientSession *s, uint64_t handle, bool only_structured,
+        int *request_ret, QEMUIOVector *qiov, NBDReply *reply, void **payload,
+        Error **errp)
+{
+    int ret = nbd_co_do_receive_one_chunk(s, handle, only_structured,
+                                          request_ret, qiov, payload, errp);
+
+    if (ret < 0) {
+        s->quit = true;
+    } else {
+        /* For assert at loop start in nbd_connection_entry */
+        if (reply) {
+            *reply = s->reply;
+        }
+        s->reply.handle = 0;
+    }
+
+    if (s->connection_co) {
+        aio_co_wake(s->connection_co);
+    }
+
+    return ret;
+}
+
+typedef struct NBDReplyChunkIter {
+    int ret;
+    int request_ret;
+    Error *err;
+    bool done, only_structured;
+} NBDReplyChunkIter;
+
+static void nbd_iter_channel_error(NBDReplyChunkIter *iter,
+                                   int ret, Error **local_err)
+{
+    assert(ret < 0);
+
+    if (!iter->ret) {
+        iter->ret = ret;
+        error_propagate(&iter->err, *local_err);
+    } else {
+        error_free(*local_err);
+    }
+
+    *local_err = NULL;
+}
+
+static void nbd_iter_request_error(NBDReplyChunkIter *iter, int ret)
+{
+    assert(ret < 0);
+
+    if (!iter->request_ret) {
+        iter->request_ret = ret;
+    }
+}
+
+/*
+ * NBD_FOREACH_REPLY_CHUNK
+ * The pointer stored in @payload requires g_free() to free it.
+ */
+#define NBD_FOREACH_REPLY_CHUNK(s, iter, handle, structured, \
+                                qiov, reply, payload) \
+    for (iter = (NBDReplyChunkIter) { .only_structured = structured }; \
+         nbd_reply_chunk_iter_receive(s, &iter, handle, qiov, reply, payload);)
+
+/*
+ * nbd_reply_chunk_iter_receive
+ * The pointer stored in @payload requires g_free() to free it.
+ */
+static bool nbd_reply_chunk_iter_receive(NBDClientSession *s,
+                                         NBDReplyChunkIter *iter,
+                                         uint64_t handle,
+                                         QEMUIOVector *qiov, NBDReply *reply,
+                                         void **payload)
+{
+    int ret, request_ret;
+    NBDReply local_reply;
+    NBDStructuredReplyChunk *chunk;
+    Error *local_err = NULL;
+    if (s->quit) {
+        error_setg(&local_err, "Connection closed");
+        nbd_iter_channel_error(iter, -EIO, &local_err);
+        goto break_loop;
+    }
+
+    if (iter->done) {
+        /* Previous iteration was last. */
+        goto break_loop;
+    }
+
+    if (reply == NULL) {
+        reply = &local_reply;
+    }
+
+    ret = nbd_co_receive_one_chunk(s, handle, iter->only_structured,
+                                   &request_ret, qiov, reply, payload,
+                                   &local_err);
+    if (ret < 0) {
+        nbd_iter_channel_error(iter, ret, &local_err);
+    } else if (request_ret < 0) {
+        nbd_iter_request_error(iter, request_ret);
+    }
+
+    /* Do not execute the body of NBD_FOREACH_REPLY_CHUNK for simple reply. */
+    if (nbd_reply_is_simple(reply) || s->quit) {
+        goto break_loop;
+    }
+
+    chunk = &reply->structured;
+    iter->only_structured = true;
+
+    if (chunk->type == NBD_REPLY_TYPE_NONE) {
+        /* NBD_REPLY_FLAG_DONE is already checked in nbd_co_receive_one_chunk */
+        assert(chunk->flags & NBD_REPLY_FLAG_DONE);
+        goto break_loop;
+    }
+
+    if (chunk->flags & NBD_REPLY_FLAG_DONE) {
+        /* This iteration is last. */
+        iter->done = true;
+    }
+
+    /* Execute the loop body */
+    return true;
+
+break_loop:
+    s->requests[HANDLE_TO_INDEX(s, handle)].coroutine = NULL;
+
+    qemu_co_mutex_lock(&s->send_mutex);
+    s->in_flight--;
+    qemu_co_queue_next(&s->free_sema);
+    qemu_co_mutex_unlock(&s->send_mutex);
+
+    return false;
+}
+
+static int nbd_co_receive_return_code(NBDClientSession *s, uint64_t handle,
+                                      int *request_ret, Error **errp)
+{
+    NBDReplyChunkIter iter;
+
+    NBD_FOREACH_REPLY_CHUNK(s, iter, handle, false, NULL, NULL, NULL) {
+        /* nbd_reply_chunk_iter_receive does all the work */
+    }
+
+    error_propagate(errp, iter.err);
+    *request_ret = iter.request_ret;
+    return iter.ret;
+}
+
+static int nbd_co_receive_cmdread_reply(NBDClientSession *s, uint64_t handle,
+                                        uint64_t offset, QEMUIOVector *qiov,
+                                        int *request_ret, Error **errp)
+{
+    NBDReplyChunkIter iter;
+    NBDReply reply;
+    void *payload = NULL;
+    Error *local_err = NULL;
+
+    NBD_FOREACH_REPLY_CHUNK(s, iter, handle, s->info.structured_reply,
+                            qiov, &reply, &payload)
+    {
+        int ret;
+        NBDStructuredReplyChunk *chunk = &reply.structured;
+
+        assert(nbd_reply_is_structured(&reply));
+
+        switch (chunk->type) {
+        case NBD_REPLY_TYPE_OFFSET_DATA:
+            /*
+             * special cased in nbd_co_receive_one_chunk, data is already
+             * in qiov
+             */
+            break;
+        case NBD_REPLY_TYPE_OFFSET_HOLE:
+            ret = nbd_parse_offset_hole_payload(s, &reply.structured, payload,
+                                                offset, qiov, &local_err);
+            if (ret < 0) {
+                s->quit = true;
+                nbd_iter_channel_error(&iter, ret, &local_err);
+            }
+            break;
+        default:
+            if (!nbd_reply_type_is_error(chunk->type)) {
+                /* not allowed reply type */
+                s->quit = true;
+                error_setg(&local_err,
+                           "Unexpected reply type: %d (%s) for CMD_READ",
+                           chunk->type, nbd_reply_type_lookup(chunk->type));
+                nbd_iter_channel_error(&iter, -EINVAL, &local_err);
+            }
+        }
+
+        g_free(payload);
+        payload = NULL;
+    }
+
+    error_propagate(errp, iter.err);
+    *request_ret = iter.request_ret;
+    return iter.ret;
+}
+
+static int nbd_co_receive_blockstatus_reply(NBDClientSession *s,
+                                            uint64_t handle, uint64_t length,
+                                            NBDExtent *extent,
+                                            int *request_ret, Error **errp)
+{
+    NBDReplyChunkIter iter;
+    NBDReply reply;
+    void *payload = NULL;
+    Error *local_err = NULL;
+    bool received = false;
+
+    assert(!extent->length);
+    NBD_FOREACH_REPLY_CHUNK(s, iter, handle, false, NULL, &reply, &payload) {
+        int ret;
+        NBDStructuredReplyChunk *chunk = &reply.structured;
+
+        assert(nbd_reply_is_structured(&reply));
+
+        switch (chunk->type) {
+        case NBD_REPLY_TYPE_BLOCK_STATUS:
+            if (received) {
+                s->quit = true;
+                error_setg(&local_err, "Several BLOCK_STATUS chunks in reply");
+                nbd_iter_channel_error(&iter, -EINVAL, &local_err);
+            }
+            received = true;
+
+            ret = nbd_parse_blockstatus_payload(s, &reply.structured,
+                                                payload, length, extent,
+                                                &local_err);
+            if (ret < 0) {
+                s->quit = true;
+                nbd_iter_channel_error(&iter, ret, &local_err);
+            }
+            break;
+        default:
+            if (!nbd_reply_type_is_error(chunk->type)) {
+                s->quit = true;
+                error_setg(&local_err,
+                           "Unexpected reply type: %d (%s) "
+                           "for CMD_BLOCK_STATUS",
+                           chunk->type, nbd_reply_type_lookup(chunk->type));
+                nbd_iter_channel_error(&iter, -EINVAL, &local_err);
+            }
+        }
+
+        g_free(payload);
+        payload = NULL;
+    }
+
+    if (!extent->length && !iter.request_ret) {
+        error_setg(&local_err, "Server did not reply with any status extents");
+        nbd_iter_channel_error(&iter, -EIO, &local_err);
+    }
+
+    error_propagate(errp, iter.err);
+    *request_ret = iter.request_ret;
+    return iter.ret;
+}
+
+static int nbd_co_request(BlockDriverState *bs, NBDRequest *request,
+                          QEMUIOVector *write_qiov)
+{
+    int ret, request_ret;
+    Error *local_err = NULL;
+    NBDClientSession *client = nbd_get_client_session(bs);
+
+    assert(request->type != NBD_CMD_READ);
+    if (write_qiov) {
+        assert(request->type == NBD_CMD_WRITE);
+        assert(request->len == iov_size(write_qiov->iov, write_qiov->niov));
+    } else {
+        assert(request->type != NBD_CMD_WRITE);
+    }
+    ret = nbd_co_send_request(bs, request, write_qiov);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = nbd_co_receive_return_code(client, request->handle,
+                                     &request_ret, &local_err);
+    if (local_err) {
+        trace_nbd_co_request_fail(request->from, request->len, request->handle,
+                                  request->flags, request->type,
+                                  nbd_cmd_lookup(request->type),
+                                  ret, error_get_pretty(local_err));
+        error_free(local_err);
+    }
+    return ret ? ret : request_ret;
+}
+
+static int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
+                                uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+    int ret, request_ret;
+    Error *local_err = NULL;
+    NBDClientSession *client = nbd_get_client_session(bs);
+    NBDRequest request = {
+        .type = NBD_CMD_READ,
+        .from = offset,
+        .len = bytes,
+    };
+
+    assert(bytes <= NBD_MAX_BUFFER_SIZE);
+    assert(!flags);
+
+    if (!bytes) {
+        return 0;
+    }
+    /*
+     * Work around the fact that the block layer doesn't do
+     * byte-accurate sizing yet - if the read exceeds the server's
+     * advertised size because the block layer rounded size up, then
+     * truncate the request to the server and tail-pad with zero.
+     */
+    if (offset >= client->info.size) {
+        assert(bytes < BDRV_SECTOR_SIZE);
+        qemu_iovec_memset(qiov, 0, 0, bytes);
+        return 0;
+    }
+    if (offset + bytes > client->info.size) {
+        uint64_t slop = offset + bytes - client->info.size;
+
+        assert(slop < BDRV_SECTOR_SIZE);
+        qemu_iovec_memset(qiov, bytes - slop, 0, slop);
+        request.len -= slop;
+    }
+
+    ret = nbd_co_send_request(bs, &request, NULL);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = nbd_co_receive_cmdread_reply(client, request.handle, offset, qiov,
+                                       &request_ret, &local_err);
+    if (local_err) {
+        trace_nbd_co_request_fail(request.from, request.len, request.handle,
+                                  request.flags, request.type,
+                                  nbd_cmd_lookup(request.type),
+                                  ret, error_get_pretty(local_err));
+        error_free(local_err);
+    }
+    return ret ? ret : request_ret;
+}
+
+static int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
+                                 uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+    NBDClientSession *client = nbd_get_client_session(bs);
+    NBDRequest request = {
+        .type = NBD_CMD_WRITE,
+        .from = offset,
+        .len = bytes,
+    };
+
+    assert(!(client->info.flags & NBD_FLAG_READ_ONLY));
+    if (flags & BDRV_REQ_FUA) {
+        assert(client->info.flags & NBD_FLAG_SEND_FUA);
+        request.flags |= NBD_CMD_FLAG_FUA;
+    }
+
+    assert(bytes <= NBD_MAX_BUFFER_SIZE);
+
+    if (!bytes) {
+        return 0;
+    }
+    return nbd_co_request(bs, &request, qiov);
+}
+
+static int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+                                       int bytes, BdrvRequestFlags flags)
+{
+    NBDClientSession *client = nbd_get_client_session(bs);
+    NBDRequest request = {
+        .type = NBD_CMD_WRITE_ZEROES,
+        .from = offset,
+        .len = bytes,
+    };
+
+    assert(!(client->info.flags & NBD_FLAG_READ_ONLY));
+    if (!(client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES)) {
+        return -ENOTSUP;
+    }
+
+    if (flags & BDRV_REQ_FUA) {
+        assert(client->info.flags & NBD_FLAG_SEND_FUA);
+        request.flags |= NBD_CMD_FLAG_FUA;
+    }
+    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
+        request.flags |= NBD_CMD_FLAG_NO_HOLE;
+    }
+
+    if (!bytes) {
+        return 0;
+    }
+    return nbd_co_request(bs, &request, NULL);
+}
+
+static int nbd_client_co_flush(BlockDriverState *bs)
+{
+    NBDClientSession *client = nbd_get_client_session(bs);
+    NBDRequest request = { .type = NBD_CMD_FLUSH };
+
+    if (!(client->info.flags & NBD_FLAG_SEND_FLUSH)) {
+        return 0;
+    }
+
+    request.from = 0;
+    request.len = 0;
+
+    return nbd_co_request(bs, &request, NULL);
+}
+
+static int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset,
+                                  int bytes)
+{
+    NBDClientSession *client = nbd_get_client_session(bs);
+    NBDRequest request = {
+        .type = NBD_CMD_TRIM,
+        .from = offset,
+        .len = bytes,
+    };
+
+    assert(!(client->info.flags & NBD_FLAG_READ_ONLY));
+    if (!(client->info.flags & NBD_FLAG_SEND_TRIM) || !bytes) {
+        return 0;
+    }
+
+    return nbd_co_request(bs, &request, NULL);
+}
+
+static int coroutine_fn nbd_client_co_block_status(
+        BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes,
+        int64_t *pnum, int64_t *map, BlockDriverState **file)
+{
+    int ret, request_ret;
+    NBDExtent extent = { 0 };
+    NBDClientSession *client = nbd_get_client_session(bs);
+    Error *local_err = NULL;
+
+    NBDRequest request = {
+        .type = NBD_CMD_BLOCK_STATUS,
+        .from = offset,
+        .len = MIN(MIN_NON_ZERO(QEMU_ALIGN_DOWN(INT_MAX,
+                                                bs->bl.request_alignment),
+                                client->info.max_block),
+                   MIN(bytes, client->info.size - offset)),
+        .flags = NBD_CMD_FLAG_REQ_ONE,
+    };
+
+    if (!client->info.base_allocation) {
+        *pnum = bytes;
+        *map = offset;
+        *file = bs;
+        return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
+    }
+
+    /*
+     * Work around the fact that the block layer doesn't do
+     * byte-accurate sizing yet - if the status request exceeds the
+     * server's advertised size because the block layer rounded size
+     * up, we truncated the request to the server (above), or are
+     * called on just the hole.
+     */
+    if (offset >= client->info.size) {
+        *pnum = bytes;
+        assert(bytes < BDRV_SECTOR_SIZE);
+        /* Intentionally don't report offset_valid for the hole */
+        return BDRV_BLOCK_ZERO;
+    }
+
+    if (client->info.min_block) {
+        assert(QEMU_IS_ALIGNED(request.len, client->info.min_block));
+    }
+    ret = nbd_co_send_request(bs, &request, NULL);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = nbd_co_receive_blockstatus_reply(client, request.handle, bytes,
+                                           &extent, &request_ret, &local_err);
+    if (local_err) {
+        trace_nbd_co_request_fail(request.from, request.len, request.handle,
+                                  request.flags, request.type,
+                                  nbd_cmd_lookup(request.type),
+                                  ret, error_get_pretty(local_err));
+        error_free(local_err);
+    }
+    if (ret < 0 || request_ret < 0) {
+        return ret ? ret : request_ret;
+    }
+
+    assert(extent.length);
+    *pnum = extent.length;
+    *map = offset;
+    *file = bs;
+    return (extent.flags & NBD_STATE_HOLE ? 0 : BDRV_BLOCK_DATA) |
+        (extent.flags & NBD_STATE_ZERO ? BDRV_BLOCK_ZERO : 0) |
+        BDRV_BLOCK_OFFSET_VALID;
+}
+
+static void nbd_client_close(BlockDriverState *bs)
+{
+    NBDClientSession *client = nbd_get_client_session(bs);
+    NBDRequest request = { .type = NBD_CMD_DISC };
+
+    assert(client->ioc);
+
+    nbd_send_request(client->ioc, &request);
+
+    nbd_teardown_connection(bs);
+}
+
+static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
+                                                  Error **errp)
+{
+    QIOChannelSocket *sioc;
+    Error *local_err = NULL;
+
+    sioc = qio_channel_socket_new();
+    qio_channel_set_name(QIO_CHANNEL(sioc), "nbd-client");
+
+    qio_channel_socket_connect_sync(sioc, saddr, &local_err);
+    if (local_err) {
+        object_unref(OBJECT(sioc));
+        error_propagate(errp, local_err);
+        return NULL;
+    }
+
+    qio_channel_set_delay(QIO_CHANNEL(sioc), false);
+
+    return sioc;
+}
+
+static int nbd_client_connect(BlockDriverState *bs,
+                              SocketAddress *saddr,
+                              const char *export,
+                              QCryptoTLSCreds *tlscreds,
+                              const char *hostname,
+                              const char *x_dirty_bitmap,
+                              Error **errp)
+{
+    NBDClientSession *client = nbd_get_client_session(bs);
+    int ret;
+
+    /*
+     * establish TCP connection, return error if it fails
+     * TODO: Configurable retry-until-timeout behaviour.
+     */
+    QIOChannelSocket *sioc = nbd_establish_connection(saddr, errp);
+
+    if (!sioc) {
+        return -ECONNREFUSED;
+    }
+
+    /* NBD handshake */
+    trace_nbd_client_connect(export);
+    qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
+
+    client->info.request_sizes = true;
+    client->info.structured_reply = true;
+    client->info.base_allocation = true;
+    client->info.x_dirty_bitmap = g_strdup(x_dirty_bitmap);
+    client->info.name = g_strdup(export ?: "");
+    ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), tlscreds, hostname,
+                                &client->ioc, &client->info, errp);
+    g_free(client->info.x_dirty_bitmap);
+    g_free(client->info.name);
+    if (ret < 0) {
+        object_unref(OBJECT(sioc));
+        return ret;
+    }
+    if (x_dirty_bitmap && !client->info.base_allocation) {
+        error_setg(errp, "requested x-dirty-bitmap %s not found",
+                   x_dirty_bitmap);
+        ret = -EINVAL;
+        goto fail;
+    }
+    if (client->info.flags & NBD_FLAG_READ_ONLY) {
+        ret = bdrv_apply_auto_read_only(bs, "NBD export is read-only", errp);
+        if (ret < 0) {
+            goto fail;
+        }
+    }
+    if (client->info.flags & NBD_FLAG_SEND_FUA) {
+        bs->supported_write_flags = BDRV_REQ_FUA;
+        bs->supported_zero_flags |= BDRV_REQ_FUA;
+    }
+    if (client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) {
+        bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
+    }
+
+    client->sioc = sioc;
+
+    if (!client->ioc) {
+        client->ioc = QIO_CHANNEL(sioc);
+        object_ref(OBJECT(client->ioc));
+    }
+
+    /*
+     * Now that we're connected, set the socket to be non-blocking and
+     * kick the reply mechanism.
+     */
+    qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
+    client->connection_co = qemu_coroutine_create(nbd_connection_entry, client);
+    bdrv_inc_in_flight(bs);
+    nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
+
+    trace_nbd_client_connect_success(export);
+
+    return 0;
+
+ fail:
+    /*
+     * We have connected, but must fail for other reasons. The
+     * connection is still blocking; send NBD_CMD_DISC as a courtesy
+     * to the server.
+     */
+    {
+        NBDRequest request = { .type = NBD_CMD_DISC };
+
+        nbd_send_request(client->ioc ?: QIO_CHANNEL(sioc), &request);
+
+        object_unref(OBJECT(sioc));
+
+        return ret;
+    }
+}
+
+static int nbd_client_init(BlockDriverState *bs,
+                           SocketAddress *saddr,
+                           const char *export,
+                           QCryptoTLSCreds *tlscreds,
+                           const char *hostname,
+                           const char *x_dirty_bitmap,
+                           Error **errp)
+{
+    NBDClientSession *client = nbd_get_client_session(bs);
+
+    client->bs = bs;
+    qemu_co_mutex_init(&client->send_mutex);
+    qemu_co_queue_init(&client->free_sema);
+
+    return nbd_client_connect(bs, saddr, export, tlscreds, hostname,
+                              x_dirty_bitmap, errp);
+}
+
 static int nbd_parse_uri(const char *filename, QDict *options)
 {
     URI *uri;
@@ -289,12 +1527,6 @@ done:
     return saddr;
 }
 
-NBDClientSession *nbd_get_client_session(BlockDriverState *bs)
-{
-    BDRVNBDState *s = bs->opaque;
-    return &s->client;
-}
-
 static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
 {
     Object *obj;
@@ -483,17 +1715,6 @@ static int64_t nbd_getlength(BlockDriverState *bs)
     return s->client.info.size;
 }
 
-static void nbd_detach_aio_context(BlockDriverState *bs)
-{
-    nbd_client_detach_aio_context(bs);
-}
-
-static void nbd_attach_aio_context(BlockDriverState *bs,
-                                   AioContext *new_context)
-{
-    nbd_client_attach_aio_context(bs, new_context);
-}
-
 static void nbd_refresh_filename(BlockDriverState *bs)
 {
     BDRVNBDState *s = bs->opaque;
@@ -559,8 +1780,8 @@ static BlockDriver bdrv_nbd = {
     .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
     .bdrv_refresh_limits        = nbd_refresh_limits,
     .bdrv_getlength             = nbd_getlength,
-    .bdrv_detach_aio_context    = nbd_detach_aio_context,
-    .bdrv_attach_aio_context    = nbd_attach_aio_context,
+    .bdrv_detach_aio_context    = nbd_client_detach_aio_context,
+    .bdrv_attach_aio_context    = nbd_client_attach_aio_context,
     .bdrv_refresh_filename      = nbd_refresh_filename,
     .bdrv_co_block_status       = nbd_client_co_block_status,
     .bdrv_dirname               = nbd_dirname,
@@ -581,8 +1802,8 @@ static BlockDriver bdrv_nbd_tcp = {
     .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
     .bdrv_refresh_limits        = nbd_refresh_limits,
     .bdrv_getlength             = nbd_getlength,
-    .bdrv_detach_aio_context    = nbd_detach_aio_context,
-    .bdrv_attach_aio_context    = nbd_attach_aio_context,
+    .bdrv_detach_aio_context    = nbd_client_detach_aio_context,
+    .bdrv_attach_aio_context    = nbd_client_attach_aio_context,
     .bdrv_refresh_filename      = nbd_refresh_filename,
     .bdrv_co_block_status       = nbd_client_co_block_status,
     .bdrv_dirname               = nbd_dirname,
@@ -603,8 +1824,8 @@ static BlockDriver bdrv_nbd_unix = {
     .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
     .bdrv_refresh_limits        = nbd_refresh_limits,
     .bdrv_getlength             = nbd_getlength,
-    .bdrv_detach_aio_context    = nbd_detach_aio_context,
-    .bdrv_attach_aio_context    = nbd_attach_aio_context,
+    .bdrv_detach_aio_context    = nbd_client_detach_aio_context,
+    .bdrv_attach_aio_context    = nbd_client_attach_aio_context,
     .bdrv_refresh_filename      = nbd_refresh_filename,
     .bdrv_co_block_status       = nbd_client_co_block_status,
     .bdrv_dirname               = nbd_dirname,
diff --git a/block/trace-events b/block/trace-events
index 01fa5eb081..f6e43ee023 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -160,7 +160,7 @@ nvme_cmd_map_qiov_iov(void *s, int i, void *page, int pages) "s %p iov[%d] %p pa
 # iscsi.c
 iscsi_xcopy(void *src_lun, uint64_t src_off, void *dst_lun, uint64_t dst_off, uint64_t bytes, int ret) "src_lun %p offset %"PRIu64" dst_lun %p offset %"PRIu64" bytes %"PRIu64" ret %d"
 
-# nbd-client.c
+# nbd.c
 nbd_parse_blockstatus_compliance(const char *err) "ignoring extra data from non-compliant server: %s"
 nbd_structured_read_compliance(const char *type) "server sent non-compliant unaligned read %s chunk"
 nbd_read_reply_entry_fail(int ret, const char *err) "ret = %d, err: %s"

From 611ae1d71647c58b23dc2c116a946884ce30abf8 Mon Sep 17 00:00:00 2001
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Date: Tue, 11 Jun 2019 13:27:20 +0300
Subject: [PATCH 9/9] block/nbd: merge NBDClientSession struct back to
 BDRVNBDState

No reason to keep it separate, it differs from others block driver
behavior and therefore confuses. Instead of generic
  'state = (State*)bs->opaque' we have to use special helper.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20190611102720.86114-4-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
---
 block/nbd.c | 197 +++++++++++++++++++++++++---------------------------
 1 file changed, 94 insertions(+), 103 deletions(-)

diff --git a/block/nbd.c b/block/nbd.c
index 1f00be2d66..81edabbf35 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -53,7 +53,7 @@ typedef struct {
     bool receiving;         /* waiting for connection_co? */
 } NBDClientRequest;
 
-typedef struct NBDClientSession {
+typedef struct BDRVNBDState {
     QIOChannelSocket *sioc; /* The master data channel */
     QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
     NBDExportInfo info;
@@ -67,24 +67,13 @@ typedef struct NBDClientSession {
     NBDReply reply;
     BlockDriverState *bs;
     bool quit;
-} NBDClientSession;
-
-typedef struct BDRVNBDState {
-    NBDClientSession client;
 
     /* For nbd_refresh_filename() */
     SocketAddress *saddr;
     char *export, *tlscredsid;
 } BDRVNBDState;
 
-static NBDClientSession *nbd_get_client_session(BlockDriverState *bs)
-{
-    BDRVNBDState *s = bs->opaque;
-    return &s->client;
-}
-
-
-static void nbd_recv_coroutines_wake_all(NBDClientSession *s)
+static void nbd_recv_coroutines_wake_all(BDRVNBDState *s)
 {
     int i;
 
@@ -99,14 +88,15 @@ static void nbd_recv_coroutines_wake_all(NBDClientSession *s)
 
 static void nbd_client_detach_aio_context(BlockDriverState *bs)
 {
-    NBDClientSession *client = nbd_get_client_session(bs);
-    qio_channel_detach_aio_context(QIO_CHANNEL(client->ioc));
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
+
+    qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc));
 }
 
 static void nbd_client_attach_aio_context_bh(void *opaque)
 {
     BlockDriverState *bs = opaque;
-    NBDClientSession *client = nbd_get_client_session(bs);
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
 
     /*
      * The node is still drained, so we know the coroutine has yielded in
@@ -114,15 +104,16 @@ static void nbd_client_attach_aio_context_bh(void *opaque)
      * entered for the first time. Both places are safe for entering the
      * coroutine.
      */
-    qemu_aio_coroutine_enter(bs->aio_context, client->connection_co);
+    qemu_aio_coroutine_enter(bs->aio_context, s->connection_co);
     bdrv_dec_in_flight(bs);
 }
 
 static void nbd_client_attach_aio_context(BlockDriverState *bs,
                                           AioContext *new_context)
 {
-    NBDClientSession *client = nbd_get_client_session(bs);
-    qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc), new_context);
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
+
+    qio_channel_attach_aio_context(QIO_CHANNEL(s->ioc), new_context);
 
     bdrv_inc_in_flight(bs);
 
@@ -136,26 +127,26 @@ static void nbd_client_attach_aio_context(BlockDriverState *bs,
 
 static void nbd_teardown_connection(BlockDriverState *bs)
 {
-    NBDClientSession *client = nbd_get_client_session(bs);
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
 
-    assert(client->ioc);
+    assert(s->ioc);
 
     /* finish any pending coroutines */
-    qio_channel_shutdown(client->ioc,
+    qio_channel_shutdown(s->ioc,
                          QIO_CHANNEL_SHUTDOWN_BOTH,
                          NULL);
-    BDRV_POLL_WHILE(bs, client->connection_co);
+    BDRV_POLL_WHILE(bs, s->connection_co);
 
     nbd_client_detach_aio_context(bs);
-    object_unref(OBJECT(client->sioc));
-    client->sioc = NULL;
-    object_unref(OBJECT(client->ioc));
-    client->ioc = NULL;
+    object_unref(OBJECT(s->sioc));
+    s->sioc = NULL;
+    object_unref(OBJECT(s->ioc));
+    s->ioc = NULL;
 }
 
 static coroutine_fn void nbd_connection_entry(void *opaque)
 {
-    NBDClientSession *s = opaque;
+    BDRVNBDState *s = opaque;
     uint64_t i;
     int ret = 0;
     Error *local_err = NULL;
@@ -223,7 +214,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
                                NBDRequest *request,
                                QEMUIOVector *qiov)
 {
-    NBDClientSession *s = nbd_get_client_session(bs);
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     int rc, i;
 
     qemu_co_mutex_lock(&s->send_mutex);
@@ -298,7 +289,7 @@ static inline uint64_t payload_advance64(uint8_t **payload)
     return ldq_be_p(*payload - 8);
 }
 
-static int nbd_parse_offset_hole_payload(NBDClientSession *client,
+static int nbd_parse_offset_hole_payload(BDRVNBDState *s,
                                          NBDStructuredReplyChunk *chunk,
                                          uint8_t *payload, uint64_t orig_offset,
                                          QEMUIOVector *qiov, Error **errp)
@@ -321,8 +312,8 @@ static int nbd_parse_offset_hole_payload(NBDClientSession *client,
                          " region");
         return -EINVAL;
     }
-    if (client->info.min_block &&
-        !QEMU_IS_ALIGNED(hole_size, client->info.min_block)) {
+    if (s->info.min_block &&
+        !QEMU_IS_ALIGNED(hole_size, s->info.min_block)) {
         trace_nbd_structured_read_compliance("hole");
     }
 
@@ -336,7 +327,7 @@ static int nbd_parse_offset_hole_payload(NBDClientSession *client,
  * Based on our request, we expect only one extent in reply, for the
  * base:allocation context.
  */
-static int nbd_parse_blockstatus_payload(NBDClientSession *client,
+static int nbd_parse_blockstatus_payload(BDRVNBDState *s,
                                          NBDStructuredReplyChunk *chunk,
                                          uint8_t *payload, uint64_t orig_length,
                                          NBDExtent *extent, Error **errp)
@@ -351,11 +342,11 @@ static int nbd_parse_blockstatus_payload(NBDClientSession *client,
     }
 
     context_id = payload_advance32(&payload);
-    if (client->info.context_id != context_id) {
+    if (s->info.context_id != context_id) {
         error_setg(errp, "Protocol error: unexpected context id %d for "
                          "NBD_REPLY_TYPE_BLOCK_STATUS, when negotiated context "
                          "id is %d", context_id,
-                         client->info.context_id);
+                         s->info.context_id);
         return -EINVAL;
     }
 
@@ -380,14 +371,14 @@ static int nbd_parse_blockstatus_payload(NBDClientSession *client,
      * up to the full block and change the status to fully-allocated
      * (always a safe status, even if it loses information).
      */
-    if (client->info.min_block && !QEMU_IS_ALIGNED(extent->length,
-                                                   client->info.min_block)) {
+    if (s->info.min_block && !QEMU_IS_ALIGNED(extent->length,
+                                                   s->info.min_block)) {
         trace_nbd_parse_blockstatus_compliance("extent length is unaligned");
-        if (extent->length > client->info.min_block) {
+        if (extent->length > s->info.min_block) {
             extent->length = QEMU_ALIGN_DOWN(extent->length,
-                                             client->info.min_block);
+                                             s->info.min_block);
         } else {
-            extent->length = client->info.min_block;
+            extent->length = s->info.min_block;
             extent->flags = 0;
         }
     }
@@ -453,7 +444,7 @@ static int nbd_parse_error_payload(NBDStructuredReplyChunk *chunk,
     return 0;
 }
 
-static int nbd_co_receive_offset_data_payload(NBDClientSession *s,
+static int nbd_co_receive_offset_data_payload(BDRVNBDState *s,
                                               uint64_t orig_offset,
                                               QEMUIOVector *qiov, Error **errp)
 {
@@ -498,7 +489,7 @@ static int nbd_co_receive_offset_data_payload(NBDClientSession *s,
 
 #define NBD_MAX_MALLOC_PAYLOAD 1000
 static coroutine_fn int nbd_co_receive_structured_payload(
-        NBDClientSession *s, void **payload, Error **errp)
+        BDRVNBDState *s, void **payload, Error **errp)
 {
     int ret;
     uint32_t len;
@@ -548,7 +539,7 @@ static coroutine_fn int nbd_co_receive_structured_payload(
  * corresponding to the server's error reply), and errp is unchanged.
  */
 static coroutine_fn int nbd_co_do_receive_one_chunk(
-        NBDClientSession *s, uint64_t handle, bool only_structured,
+        BDRVNBDState *s, uint64_t handle, bool only_structured,
         int *request_ret, QEMUIOVector *qiov, void **payload, Error **errp)
 {
     int ret;
@@ -641,7 +632,7 @@ static coroutine_fn int nbd_co_do_receive_one_chunk(
  * Return value is a fatal error code or normal nbd reply error code
  */
 static coroutine_fn int nbd_co_receive_one_chunk(
-        NBDClientSession *s, uint64_t handle, bool only_structured,
+        BDRVNBDState *s, uint64_t handle, bool only_structured,
         int *request_ret, QEMUIOVector *qiov, NBDReply *reply, void **payload,
         Error **errp)
 {
@@ -709,7 +700,7 @@ static void nbd_iter_request_error(NBDReplyChunkIter *iter, int ret)
  * nbd_reply_chunk_iter_receive
  * The pointer stored in @payload requires g_free() to free it.
  */
-static bool nbd_reply_chunk_iter_receive(NBDClientSession *s,
+static bool nbd_reply_chunk_iter_receive(BDRVNBDState *s,
                                          NBDReplyChunkIter *iter,
                                          uint64_t handle,
                                          QEMUIOVector *qiov, NBDReply *reply,
@@ -776,7 +767,7 @@ break_loop:
     return false;
 }
 
-static int nbd_co_receive_return_code(NBDClientSession *s, uint64_t handle,
+static int nbd_co_receive_return_code(BDRVNBDState *s, uint64_t handle,
                                       int *request_ret, Error **errp)
 {
     NBDReplyChunkIter iter;
@@ -790,7 +781,7 @@ static int nbd_co_receive_return_code(NBDClientSession *s, uint64_t handle,
     return iter.ret;
 }
 
-static int nbd_co_receive_cmdread_reply(NBDClientSession *s, uint64_t handle,
+static int nbd_co_receive_cmdread_reply(BDRVNBDState *s, uint64_t handle,
                                         uint64_t offset, QEMUIOVector *qiov,
                                         int *request_ret, Error **errp)
 {
@@ -842,7 +833,7 @@ static int nbd_co_receive_cmdread_reply(NBDClientSession *s, uint64_t handle,
     return iter.ret;
 }
 
-static int nbd_co_receive_blockstatus_reply(NBDClientSession *s,
+static int nbd_co_receive_blockstatus_reply(BDRVNBDState *s,
                                             uint64_t handle, uint64_t length,
                                             NBDExtent *extent,
                                             int *request_ret, Error **errp)
@@ -907,7 +898,7 @@ static int nbd_co_request(BlockDriverState *bs, NBDRequest *request,
 {
     int ret, request_ret;
     Error *local_err = NULL;
-    NBDClientSession *client = nbd_get_client_session(bs);
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
 
     assert(request->type != NBD_CMD_READ);
     if (write_qiov) {
@@ -921,7 +912,7 @@ static int nbd_co_request(BlockDriverState *bs, NBDRequest *request,
         return ret;
     }
 
-    ret = nbd_co_receive_return_code(client, request->handle,
+    ret = nbd_co_receive_return_code(s, request->handle,
                                      &request_ret, &local_err);
     if (local_err) {
         trace_nbd_co_request_fail(request->from, request->len, request->handle,
@@ -938,7 +929,7 @@ static int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
 {
     int ret, request_ret;
     Error *local_err = NULL;
-    NBDClientSession *client = nbd_get_client_session(bs);
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     NBDRequest request = {
         .type = NBD_CMD_READ,
         .from = offset,
@@ -957,13 +948,13 @@ static int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
      * advertised size because the block layer rounded size up, then
      * truncate the request to the server and tail-pad with zero.
      */
-    if (offset >= client->info.size) {
+    if (offset >= s->info.size) {
         assert(bytes < BDRV_SECTOR_SIZE);
         qemu_iovec_memset(qiov, 0, 0, bytes);
         return 0;
     }
-    if (offset + bytes > client->info.size) {
-        uint64_t slop = offset + bytes - client->info.size;
+    if (offset + bytes > s->info.size) {
+        uint64_t slop = offset + bytes - s->info.size;
 
         assert(slop < BDRV_SECTOR_SIZE);
         qemu_iovec_memset(qiov, bytes - slop, 0, slop);
@@ -975,7 +966,7 @@ static int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
         return ret;
     }
 
-    ret = nbd_co_receive_cmdread_reply(client, request.handle, offset, qiov,
+    ret = nbd_co_receive_cmdread_reply(s, request.handle, offset, qiov,
                                        &request_ret, &local_err);
     if (local_err) {
         trace_nbd_co_request_fail(request.from, request.len, request.handle,
@@ -990,16 +981,16 @@ static int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
 static int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
                                  uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
-    NBDClientSession *client = nbd_get_client_session(bs);
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     NBDRequest request = {
         .type = NBD_CMD_WRITE,
         .from = offset,
         .len = bytes,
     };
 
-    assert(!(client->info.flags & NBD_FLAG_READ_ONLY));
+    assert(!(s->info.flags & NBD_FLAG_READ_ONLY));
     if (flags & BDRV_REQ_FUA) {
-        assert(client->info.flags & NBD_FLAG_SEND_FUA);
+        assert(s->info.flags & NBD_FLAG_SEND_FUA);
         request.flags |= NBD_CMD_FLAG_FUA;
     }
 
@@ -1014,20 +1005,20 @@ static int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
 static int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
                                        int bytes, BdrvRequestFlags flags)
 {
-    NBDClientSession *client = nbd_get_client_session(bs);
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     NBDRequest request = {
         .type = NBD_CMD_WRITE_ZEROES,
         .from = offset,
         .len = bytes,
     };
 
-    assert(!(client->info.flags & NBD_FLAG_READ_ONLY));
-    if (!(client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES)) {
+    assert(!(s->info.flags & NBD_FLAG_READ_ONLY));
+    if (!(s->info.flags & NBD_FLAG_SEND_WRITE_ZEROES)) {
         return -ENOTSUP;
     }
 
     if (flags & BDRV_REQ_FUA) {
-        assert(client->info.flags & NBD_FLAG_SEND_FUA);
+        assert(s->info.flags & NBD_FLAG_SEND_FUA);
         request.flags |= NBD_CMD_FLAG_FUA;
     }
     if (!(flags & BDRV_REQ_MAY_UNMAP)) {
@@ -1042,10 +1033,10 @@ static int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
 
 static int nbd_client_co_flush(BlockDriverState *bs)
 {
-    NBDClientSession *client = nbd_get_client_session(bs);
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     NBDRequest request = { .type = NBD_CMD_FLUSH };
 
-    if (!(client->info.flags & NBD_FLAG_SEND_FLUSH)) {
+    if (!(s->info.flags & NBD_FLAG_SEND_FLUSH)) {
         return 0;
     }
 
@@ -1058,15 +1049,15 @@ static int nbd_client_co_flush(BlockDriverState *bs)
 static int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset,
                                   int bytes)
 {
-    NBDClientSession *client = nbd_get_client_session(bs);
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     NBDRequest request = {
         .type = NBD_CMD_TRIM,
         .from = offset,
         .len = bytes,
     };
 
-    assert(!(client->info.flags & NBD_FLAG_READ_ONLY));
-    if (!(client->info.flags & NBD_FLAG_SEND_TRIM) || !bytes) {
+    assert(!(s->info.flags & NBD_FLAG_READ_ONLY));
+    if (!(s->info.flags & NBD_FLAG_SEND_TRIM) || !bytes) {
         return 0;
     }
 
@@ -1079,7 +1070,7 @@ static int coroutine_fn nbd_client_co_block_status(
 {
     int ret, request_ret;
     NBDExtent extent = { 0 };
-    NBDClientSession *client = nbd_get_client_session(bs);
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     Error *local_err = NULL;
 
     NBDRequest request = {
@@ -1087,12 +1078,12 @@ static int coroutine_fn nbd_client_co_block_status(
         .from = offset,
         .len = MIN(MIN_NON_ZERO(QEMU_ALIGN_DOWN(INT_MAX,
                                                 bs->bl.request_alignment),
-                                client->info.max_block),
-                   MIN(bytes, client->info.size - offset)),
+                                s->info.max_block),
+                   MIN(bytes, s->info.size - offset)),
         .flags = NBD_CMD_FLAG_REQ_ONE,
     };
 
-    if (!client->info.base_allocation) {
+    if (!s->info.base_allocation) {
         *pnum = bytes;
         *map = offset;
         *file = bs;
@@ -1106,22 +1097,22 @@ static int coroutine_fn nbd_client_co_block_status(
      * up, we truncated the request to the server (above), or are
      * called on just the hole.
      */
-    if (offset >= client->info.size) {
+    if (offset >= s->info.size) {
         *pnum = bytes;
         assert(bytes < BDRV_SECTOR_SIZE);
         /* Intentionally don't report offset_valid for the hole */
         return BDRV_BLOCK_ZERO;
     }
 
-    if (client->info.min_block) {
-        assert(QEMU_IS_ALIGNED(request.len, client->info.min_block));
+    if (s->info.min_block) {
+        assert(QEMU_IS_ALIGNED(request.len, s->info.min_block));
     }
     ret = nbd_co_send_request(bs, &request, NULL);
     if (ret < 0) {
         return ret;
     }
 
-    ret = nbd_co_receive_blockstatus_reply(client, request.handle, bytes,
+    ret = nbd_co_receive_blockstatus_reply(s, request.handle, bytes,
                                            &extent, &request_ret, &local_err);
     if (local_err) {
         trace_nbd_co_request_fail(request.from, request.len, request.handle,
@@ -1145,12 +1136,12 @@ static int coroutine_fn nbd_client_co_block_status(
 
 static void nbd_client_close(BlockDriverState *bs)
 {
-    NBDClientSession *client = nbd_get_client_session(bs);
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     NBDRequest request = { .type = NBD_CMD_DISC };
 
-    assert(client->ioc);
+    assert(s->ioc);
 
-    nbd_send_request(client->ioc, &request);
+    nbd_send_request(s->ioc, &request);
 
     nbd_teardown_connection(bs);
 }
@@ -1184,7 +1175,7 @@ static int nbd_client_connect(BlockDriverState *bs,
                               const char *x_dirty_bitmap,
                               Error **errp)
 {
-    NBDClientSession *client = nbd_get_client_session(bs);
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     int ret;
 
     /*
@@ -1201,44 +1192,44 @@ static int nbd_client_connect(BlockDriverState *bs,
     trace_nbd_client_connect(export);
     qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
 
-    client->info.request_sizes = true;
-    client->info.structured_reply = true;
-    client->info.base_allocation = true;
-    client->info.x_dirty_bitmap = g_strdup(x_dirty_bitmap);
-    client->info.name = g_strdup(export ?: "");
+    s->info.request_sizes = true;
+    s->info.structured_reply = true;
+    s->info.base_allocation = true;
+    s->info.x_dirty_bitmap = g_strdup(x_dirty_bitmap);
+    s->info.name = g_strdup(export ?: "");
     ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), tlscreds, hostname,
-                                &client->ioc, &client->info, errp);
-    g_free(client->info.x_dirty_bitmap);
-    g_free(client->info.name);
+                                &s->ioc, &s->info, errp);
+    g_free(s->info.x_dirty_bitmap);
+    g_free(s->info.name);
     if (ret < 0) {
         object_unref(OBJECT(sioc));
         return ret;
     }
-    if (x_dirty_bitmap && !client->info.base_allocation) {
+    if (x_dirty_bitmap && !s->info.base_allocation) {
         error_setg(errp, "requested x-dirty-bitmap %s not found",
                    x_dirty_bitmap);
         ret = -EINVAL;
         goto fail;
     }
-    if (client->info.flags & NBD_FLAG_READ_ONLY) {
+    if (s->info.flags & NBD_FLAG_READ_ONLY) {
         ret = bdrv_apply_auto_read_only(bs, "NBD export is read-only", errp);
         if (ret < 0) {
             goto fail;
         }
     }
-    if (client->info.flags & NBD_FLAG_SEND_FUA) {
+    if (s->info.flags & NBD_FLAG_SEND_FUA) {
         bs->supported_write_flags = BDRV_REQ_FUA;
         bs->supported_zero_flags |= BDRV_REQ_FUA;
     }
-    if (client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) {
+    if (s->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) {
         bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
     }
 
-    client->sioc = sioc;
+    s->sioc = sioc;
 
-    if (!client->ioc) {
-        client->ioc = QIO_CHANNEL(sioc);
-        object_ref(OBJECT(client->ioc));
+    if (!s->ioc) {
+        s->ioc = QIO_CHANNEL(sioc);
+        object_ref(OBJECT(s->ioc));
     }
 
     /*
@@ -1246,7 +1237,7 @@ static int nbd_client_connect(BlockDriverState *bs,
      * kick the reply mechanism.
      */
     qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
-    client->connection_co = qemu_coroutine_create(nbd_connection_entry, client);
+    s->connection_co = qemu_coroutine_create(nbd_connection_entry, s);
     bdrv_inc_in_flight(bs);
     nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
 
@@ -1263,7 +1254,7 @@ static int nbd_client_connect(BlockDriverState *bs,
     {
         NBDRequest request = { .type = NBD_CMD_DISC };
 
-        nbd_send_request(client->ioc ?: QIO_CHANNEL(sioc), &request);
+        nbd_send_request(s->ioc ?: QIO_CHANNEL(sioc), &request);
 
         object_unref(OBJECT(sioc));
 
@@ -1279,11 +1270,11 @@ static int nbd_client_init(BlockDriverState *bs,
                            const char *x_dirty_bitmap,
                            Error **errp)
 {
-    NBDClientSession *client = nbd_get_client_session(bs);
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
 
-    client->bs = bs;
-    qemu_co_mutex_init(&client->send_mutex);
-    qemu_co_queue_init(&client->free_sema);
+    s->bs = bs;
+    qemu_co_mutex_init(&s->send_mutex);
+    qemu_co_queue_init(&s->free_sema);
 
     return nbd_client_connect(bs, saddr, export, tlscreds, hostname,
                               x_dirty_bitmap, errp);
@@ -1665,7 +1656,7 @@ static int nbd_co_flush(BlockDriverState *bs)
 
 static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
 {
-    NBDClientSession *s = nbd_get_client_session(bs);
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     uint32_t min = s->info.min_block;
     uint32_t max = MIN_NON_ZERO(NBD_MAX_BUFFER_SIZE, s->info.max_block);
 
@@ -1712,7 +1703,7 @@ static int64_t nbd_getlength(BlockDriverState *bs)
 {
     BDRVNBDState *s = bs->opaque;
 
-    return s->client.info.size;
+    return s->info.size;
 }
 
 static void nbd_refresh_filename(BlockDriverState *bs)