From bbc35fc20e6efcb9f177668c04ee05f25a0a2e65 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 30 Sep 2020 17:58:57 +0200
Subject: [PATCH 1/8] nbd: silence maybe-uninitialized warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

gcc 10 from Fedora 32 gives me:

Compiling C object libblock.fa.p/nbd_server.c.o
../nbd/server.c: In function ‘nbd_co_client_start’:
../nbd/server.c:625:14: error: ‘namelen’ may be used uninitialized in this function [-Werror=maybe-uninitialized]
  625 |         rc = nbd_negotiate_send_info(client, NBD_INFO_NAME, namelen, name,
      |              ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  626 |                                      errp);
      |                                      ~~~~~
../nbd/server.c:564:14: note: ‘namelen’ was declared here
  564 |     uint32_t namelen;
      |              ^~~~~~~
cc1: all warnings being treated as errors

As I cannot see how this can happen, let uns silence the warning.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Message-Id: <20200930155859.303148-3-borntraeger@de.ibm.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
---
 nbd/server.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nbd/server.c b/nbd/server.c
index f74766add7..f25cffa334 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -556,7 +556,7 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
     NBDExport *exp;
     uint16_t requests;
     uint16_t request;
-    uint32_t namelen;
+    uint32_t namelen = 0;
     bool sendname = false;
     bool blocksize = false;
     uint32_t sizes[3];

From 8c517de24a8a1dcbeb54e7e12b5b0fda42a90ace Mon Sep 17 00:00:00 2001
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Date: Thu, 3 Sep 2020 22:02:58 +0300
Subject: [PATCH 2/8] block/nbd: fix drain dead-lock because of nbd
 reconnect-delay

We pause reconnect process during drained section. So, if we have some
requests, waiting for reconnect we should cancel them, otherwise they
deadlock the drained section.

How to reproduce:

1. Create an image:
   qemu-img create -f qcow2 xx 100M

2. Start NBD server:
   qemu-nbd xx

3. Start vm with second nbd disk on node2, like this:

  ./build/x86_64-softmmu/qemu-system-x86_64 -nodefaults -drive \
     file=/work/images/cent7.qcow2 -drive \
     driver=nbd,server.type=inet,server.host=192.168.100.5,server.port=10809,reconnect-delay=60 \
     -vnc :0 -m 2G -enable-kvm -vga std

4. Access the vm through vnc (or some other way?), and check that NBD
   drive works:

   dd if=/dev/sdb of=/dev/null bs=1M count=10

   - the command should succeed.

5. Now, kill the nbd server, and run dd in the guest again:

   dd if=/dev/sdb of=/dev/null bs=1M count=10

Now Qemu is trying to reconnect, and dd-generated requests are waiting
for the connection (they will wait up to 60 seconds (see
reconnect-delay option above) and than fail). But suddenly, vm may
totally hang in the deadlock. You may need to increase reconnect-delay
period to catch the dead-lock.

VM doesn't respond because drain dead-lock happens in cpu thread with
global mutex taken. That's not good thing by itself and is not fixed
by this commit (true way is using iothreads). Still this commit fixes
drain dead-lock itself.

Note: probably, we can instead continue to reconnect during drained
section. To achieve this, we may move negotiation to the connect thread
to make it independent of bs aio context. But expanding drained section
doesn't seem good anyway. So, let's now fix the bug the simplest way.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20200903190301.367620-2-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
---
 block/nbd.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/block/nbd.c b/block/nbd.c
index 9daf003bea..912ea27be7 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -242,6 +242,11 @@ static void coroutine_fn nbd_client_co_drain_begin(BlockDriverState *bs)
     }
 
     nbd_co_establish_connection_cancel(bs, false);
+
+    if (s->state == NBD_CLIENT_CONNECTING_WAIT) {
+        s->state = NBD_CLIENT_CONNECTING_NOWAIT;
+        qemu_co_queue_restart_all(&s->free_sema);
+    }
 }
 
 static void coroutine_fn nbd_client_co_drain_end(BlockDriverState *bs)

From 8a509afd724671ffc066235e368ba7d81c9a6dd7 Mon Sep 17 00:00:00 2001
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Date: Thu, 3 Sep 2020 22:02:59 +0300
Subject: [PATCH 3/8] block/nbd: correctly use qio_channel_detach_aio_context
 when needed

Don't use nbd_client_detach_aio_context() driver handler where we want
to finalize the connection. We should directly use
qio_channel_detach_aio_context() in such cases. Driver handler may (and
will) contain another things, unrelated to the qio channel.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20200903190301.367620-3-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
---
 block/nbd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/nbd.c b/block/nbd.c
index 912ea27be7..a495ad7ddf 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -549,7 +549,7 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s)
 
     /* Finalize previous connection if any */
     if (s->ioc) {
-        nbd_client_detach_aio_context(s->bs);
+        qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc));
         object_unref(OBJECT(s->sioc));
         s->sioc = NULL;
         object_unref(OBJECT(s->ioc));
@@ -707,7 +707,7 @@ static coroutine_fn void nbd_connection_entry(void *opaque)
 
     s->connection_co = NULL;
     if (s->ioc) {
-        nbd_client_detach_aio_context(s->bs);
+        qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc));
         object_unref(OBJECT(s->sioc));
         s->sioc = NULL;
         object_unref(OBJECT(s->ioc));

From 46f56631b5d18ae744782dbfe6fd17c3ebe15f7a Mon Sep 17 00:00:00 2001
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Date: Thu, 3 Sep 2020 22:03:00 +0300
Subject: [PATCH 4/8] block/nbd: fix reconnect-delay

reconnect-delay has a design flaw: we handle it in the same loop where
we do connection attempt. So, reconnect-delay may be exceeded by
unpredictable time of connection attempt.

Let's instead use separate timer.

How to reproduce the bug:

1. Create an image on node1:
   qemu-img create -f qcow2 xx 100M

2. Start NBD server on node1:
   qemu-nbd xx

3. On node2 start qemu-io:

./build/qemu-io --image-opts \
driver=nbd,server.type=inet,server.host=192.168.100.5,server.port=10809,reconnect-delay=15

4. Type 'read 0 512' in qemu-io interface to check that connection
   works

Be careful: you should make steps 5-7 in a short time, less than 15
seconds.

5. Kill nbd server on node1

6. Run 'read 0 512' in qemu-io interface again, to be sure that nbd
client goes to reconnect loop.

7. On node1 run the following command

   sudo iptables -A INPUT -p tcp --dport 10809 -j DROP

This will make the connect() call of qemu-io at node2 take a long time.

And you'll see that read command in qemu-io will hang for a long time,
more than 15 seconds specified by reconnect-delay parameter. It's the
bug.

8. Don't forget to drop iptables rule on node1:

   sudo iptables -D INPUT -p tcp --dport 10809 -j DROP

Important note: Step [5] is necessary to reproduce _this_ bug. If we
miss step [5], the read command (step 6) will hang for a long time and
this commit doesn't help, because there will be not long connect() to
unreachable host, but long sendmsg() to unreachable host, which should
be fixed by enabling and adjusting keep-alive on the socket, which is a
thing for further patch set.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20200903190301.367620-4-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
---
 block/nbd.c | 59 +++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 50 insertions(+), 9 deletions(-)

diff --git a/block/nbd.c b/block/nbd.c
index a495ad7ddf..caae0e6d31 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -122,6 +122,8 @@ typedef struct BDRVNBDState {
     Error *connect_err;
     bool wait_in_flight;
 
+    QEMUTimer *reconnect_delay_timer;
+
     NBDClientRequest requests[MAX_NBD_REQUESTS];
     NBDReply reply;
     BlockDriverState *bs;
@@ -188,10 +190,49 @@ static void nbd_recv_coroutines_wake_all(BDRVNBDState *s)
     }
 }
 
+static void reconnect_delay_timer_del(BDRVNBDState *s)
+{
+    if (s->reconnect_delay_timer) {
+        timer_del(s->reconnect_delay_timer);
+        timer_free(s->reconnect_delay_timer);
+        s->reconnect_delay_timer = NULL;
+    }
+}
+
+static void reconnect_delay_timer_cb(void *opaque)
+{
+    BDRVNBDState *s = opaque;
+
+    if (s->state == NBD_CLIENT_CONNECTING_WAIT) {
+        s->state = NBD_CLIENT_CONNECTING_NOWAIT;
+        while (qemu_co_enter_next(&s->free_sema, NULL)) {
+            /* Resume all queued requests */
+        }
+    }
+
+    reconnect_delay_timer_del(s);
+}
+
+static void reconnect_delay_timer_init(BDRVNBDState *s, uint64_t expire_time_ns)
+{
+    if (s->state != NBD_CLIENT_CONNECTING_WAIT) {
+        return;
+    }
+
+    assert(!s->reconnect_delay_timer);
+    s->reconnect_delay_timer = aio_timer_new(bdrv_get_aio_context(s->bs),
+                                             QEMU_CLOCK_REALTIME,
+                                             SCALE_NS,
+                                             reconnect_delay_timer_cb, s);
+    timer_mod(s->reconnect_delay_timer, expire_time_ns);
+}
+
 static void nbd_client_detach_aio_context(BlockDriverState *bs)
 {
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
 
+    /* Timer is deleted in nbd_client_co_drain_begin() */
+    assert(!s->reconnect_delay_timer);
     qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc));
 }
 
@@ -243,6 +284,8 @@ static void coroutine_fn nbd_client_co_drain_begin(BlockDriverState *bs)
 
     nbd_co_establish_connection_cancel(bs, false);
 
+    reconnect_delay_timer_del(s);
+
     if (s->state == NBD_CLIENT_CONNECTING_WAIT) {
         s->state = NBD_CLIENT_CONNECTING_NOWAIT;
         qemu_co_queue_restart_all(&s->free_sema);
@@ -593,21 +636,17 @@ out:
 
 static coroutine_fn void nbd_co_reconnect_loop(BDRVNBDState *s)
 {
-    uint64_t start_time_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-    uint64_t delay_ns = s->reconnect_delay * NANOSECONDS_PER_SECOND;
     uint64_t timeout = 1 * NANOSECONDS_PER_SECOND;
     uint64_t max_timeout = 16 * NANOSECONDS_PER_SECOND;
 
+    if (s->state == NBD_CLIENT_CONNECTING_WAIT) {
+        reconnect_delay_timer_init(s, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
+                                   s->reconnect_delay * NANOSECONDS_PER_SECOND);
+    }
+
     nbd_reconnect_attempt(s);
 
     while (nbd_client_connecting(s)) {
-        if (s->state == NBD_CLIENT_CONNECTING_WAIT &&
-            qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time_ns > delay_ns)
-        {
-            s->state = NBD_CLIENT_CONNECTING_NOWAIT;
-            qemu_co_queue_restart_all(&s->free_sema);
-        }
-
         if (s->drained) {
             bdrv_dec_in_flight(s->bs);
             s->wait_drained_end = true;
@@ -629,6 +668,8 @@ static coroutine_fn void nbd_co_reconnect_loop(BDRVNBDState *s)
 
         nbd_reconnect_attempt(s);
     }
+
+    reconnect_delay_timer_del(s);
 }
 
 static coroutine_fn void nbd_connection_entry(void *opaque)

From 99d72dba1c96c3a498d935a54081e226b262641a Mon Sep 17 00:00:00 2001
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Date: Thu, 3 Sep 2020 22:03:01 +0300
Subject: [PATCH 5/8] block/nbd: nbd_co_reconnect_loop(): don't connect if
 drained

In a recent commit 12c75e20a269ac we've improved
nbd_co_reconnect_loop() to not make drain wait for additional sleep.
Similarly, we shouldn't try to connect, if previous sleep was
interrupted by drain begin, otherwise drain_begin will have to wait for
the whole connection attempt.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20200903190301.367620-5-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
---
 block/nbd.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/block/nbd.c b/block/nbd.c
index caae0e6d31..4548046cd7 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -661,6 +661,9 @@ static coroutine_fn void nbd_co_reconnect_loop(BDRVNBDState *s)
         } else {
             qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, timeout,
                                       &s->connection_co_sleep_ns_state);
+            if (s->drained) {
+                continue;
+            }
             if (timeout < max_timeout) {
                 timeout *= 2;
             }

From 029a88c9a7e3210ba565c081471bd44ba8d5e397 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Wed, 30 Sep 2020 07:11:01 -0500
Subject: [PATCH 6/8] qemu-nbd: Honor SIGINT and SIGHUP

Honoring just SIGTERM on Linux is too weak; we also want to handle
other common signals, and do so even on BSD.  Why?  Because at least
'qemu-nbd -B bitmap' needs a chance to clean up the in-use bit on
bitmaps when the server is shut down via a signal.

See also: http://bugzilla.redhat.com/1883608

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20200930121105.667049-2-eblake@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
[eblake: apply comment tweak suggested by Vladimir; fix ifdef around
termsig_handler]
Signed-off-by: Eric Blake <eblake@redhat.com>
---
 qemu-nbd.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/qemu-nbd.c b/qemu-nbd.c
index bacb69b089..bc644a0670 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -154,13 +154,13 @@ QEMU_COPYRIGHT "\n"
     , name);
 }
 
-#if HAVE_NBD_DEVICE
+#ifdef CONFIG_POSIX
 static void termsig_handler(int signum)
 {
     qatomic_cmpxchg(&state, RUNNING, TERMINATE);
     qemu_notify_event();
 }
-#endif /* HAVE_NBD_DEVICE */
+#endif /* CONFIG_POSIX */
 
 static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls,
                                 const char *hostname)
@@ -581,17 +581,18 @@ int main(int argc, char **argv)
     const char *pid_file_name = NULL;
     BlockExportOptions *export_opts;
 
-#if HAVE_NBD_DEVICE
-    /* The client thread uses SIGTERM to interrupt the server.  A signal
-     * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
+#ifdef CONFIG_POSIX
+    /*
+     * Exit gracefully on various signals, which includes SIGTERM used
+     * by 'qemu-nbd -v -c'.
      */
     struct sigaction sa_sigterm;
     memset(&sa_sigterm, 0, sizeof(sa_sigterm));
     sa_sigterm.sa_handler = termsig_handler;
     sigaction(SIGTERM, &sa_sigterm, NULL);
-#endif /* HAVE_NBD_DEVICE */
+    sigaction(SIGINT, &sa_sigterm, NULL);
+    sigaction(SIGHUP, &sa_sigterm, NULL);
 
-#ifdef CONFIG_POSIX
     signal(SIGPIPE, SIG_IGN);
 #endif
 

From d1e2c3e7bd22a99660b0c254fc05c020d0239ca0 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Wed, 30 Sep 2020 07:11:02 -0500
Subject: [PATCH 7/8] nbd/server: Reject embedded NUL in NBD strings

The NBD spec is clear that any string sent from the client must not
contain embedded NUL characters.  If the client passes "a\0", we
should reject that option request rather than act on "a".

Testing this is not possible with a compliant client, but I was able
to use gdb to coerce libnbd into temporarily behaving as such a
client.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20200930121105.667049-3-eblake@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
---
 nbd/server.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index f25cffa334..50f95abe31 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -301,10 +301,11 @@ nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...)
 }
 
 /* Read size bytes from the unparsed payload of the current option.
+ * If @check_nul, require that no NUL bytes appear in buffer.
  * Return -errno on I/O error, 0 if option was completely handled by
  * sending a reply about inconsistent lengths, or 1 on success. */
 static int nbd_opt_read(NBDClient *client, void *buffer, size_t size,
-                        Error **errp)
+                        bool check_nul, Error **errp)
 {
     if (size > client->optlen) {
         return nbd_opt_invalid(client, errp,
@@ -312,7 +313,16 @@ static int nbd_opt_read(NBDClient *client, void *buffer, size_t size,
                                nbd_opt_lookup(client->opt));
     }
     client->optlen -= size;
-    return qio_channel_read_all(client->ioc, buffer, size, errp) < 0 ? -EIO : 1;
+    if (qio_channel_read_all(client->ioc, buffer, size, errp) < 0) {
+        return -EIO;
+    }
+
+    if (check_nul && strnlen(buffer, size) != size) {
+        return nbd_opt_invalid(client, errp,
+                               "Unexpected embedded NUL in option %s",
+                               nbd_opt_lookup(client->opt));
+    }
+    return 1;
 }
 
 /* Drop size bytes from the unparsed payload of the current option.
@@ -349,7 +359,7 @@ static int nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length,
     g_autofree char *local_name = NULL;
 
     *name = NULL;
-    ret = nbd_opt_read(client, &len, sizeof(len), errp);
+    ret = nbd_opt_read(client, &len, sizeof(len), false, errp);
     if (ret <= 0) {
         return ret;
     }
@@ -361,7 +371,7 @@ static int nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length,
     }
 
     local_name = g_malloc(len + 1);
-    ret = nbd_opt_read(client, local_name, len, errp);
+    ret = nbd_opt_read(client, local_name, len, true, errp);
     if (ret <= 0) {
         return ret;
     }
@@ -576,14 +586,14 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
     }
     trace_nbd_negotiate_handle_export_name_request(name);
 
-    rc = nbd_opt_read(client, &requests, sizeof(requests), errp);
+    rc = nbd_opt_read(client, &requests, sizeof(requests), false, errp);
     if (rc <= 0) {
         return rc;
     }
     requests = be16_to_cpu(requests);
     trace_nbd_negotiate_handle_info_requests(requests);
     while (requests--) {
-        rc = nbd_opt_read(client, &request, sizeof(request), errp);
+        rc = nbd_opt_read(client, &request, sizeof(request), false, errp);
         if (rc <= 0) {
             return rc;
         }
@@ -806,7 +816,7 @@ static int nbd_meta_pattern(NBDClient *client, const char *pattern, bool *match,
     assert(len);
 
     query = g_malloc(len);
-    ret = nbd_opt_read(client, query, len, errp);
+    ret = nbd_opt_read(client, query, len, true, errp);
     if (ret <= 0) {
         g_free(query);
         return ret;
@@ -943,7 +953,7 @@ static int nbd_negotiate_meta_query(NBDClient *client,
     char ns[5];
     uint32_t len;
 
-    ret = nbd_opt_read(client, &len, sizeof(len), errp);
+    ret = nbd_opt_read(client, &len, sizeof(len), false, errp);
     if (ret <= 0) {
         return ret;
     }
@@ -959,7 +969,7 @@ static int nbd_negotiate_meta_query(NBDClient *client,
     }
 
     len -= ns_len;
-    ret = nbd_opt_read(client, ns, ns_len, errp);
+    ret = nbd_opt_read(client, ns, ns_len, true, errp);
     if (ret <= 0) {
         return ret;
     }
@@ -1016,7 +1026,7 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
                             "export '%s' not present", sane_name);
     }
 
-    ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), errp);
+    ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), false, errp);
     if (ret <= 0) {
         return ret;
     }

From ebd57062a1957307a175a810441af87259d7dbe9 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Wed, 30 Sep 2020 07:11:03 -0500
Subject: [PATCH 8/8] nbd: Simplify meta-context parsing

We had a premature optimization of trying to read as little from the
wire as possible while handling NBD_OPT_SET_META_CONTEXT in phases.
But in reality, we HAVE to read the entire string from the client
before we can get to the next command, and it is easier to just read
it all at once than it is to read it in pieces.  And once we do that,
several functions end up no longer performing I/O, so they can drop
length and errp parameters, and just return a bool instead of
modifying through a pointer.

Our iotests still pass; I also checked that libnbd's testsuite (which
covers more corner cases of odd meta context requests) still passes.
There are cases where the sequence of trace messages produced differs
(for example, when no bitmap is exported, a query for "qemu:" now
produces two trace lines instead of one), but trace points are for
debug and have no effect on what the client sees.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20200930121105.667049-4-eblake@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
[eblake: enhance commit message]
Signed-off-by: Eric Blake <eblake@redhat.com>
---
 nbd/server.c | 189 ++++++++++++++++++---------------------------------
 1 file changed, 68 insertions(+), 121 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 50f95abe31..e75c825879 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (C) 2016-2018 Red Hat, Inc.
+ *  Copyright (C) 2016-2020 Red Hat, Inc.
  *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
  *
  *  Network Block Device Server Side
@@ -797,135 +797,95 @@ static int nbd_negotiate_send_meta_context(NBDClient *client,
     return qio_channel_writev_all(client->ioc, iov, 2, errp) < 0 ? -EIO : 0;
 }
 
-/* Read strlen(@pattern) bytes, and set @match to true if they match @pattern.
- * @match is never set to false.
- *
- * Return -errno on I/O error, 0 if option was completely handled by
- * sending a reply about inconsistent lengths, or 1 on success.
- *
- * Note: return code = 1 doesn't mean that we've read exactly @pattern.
- * It only means that there are no errors.
+/*
+ * Return true if @query matches @pattern, or if @query is empty when
+ * the @client is performing _LIST_.
  */
-static int nbd_meta_pattern(NBDClient *client, const char *pattern, bool *match,
-                            Error **errp)
+static bool nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern,
+                                      const char *query)
 {
-    int ret;
-    char *query;
-    size_t len = strlen(pattern);
-
-    assert(len);
-
-    query = g_malloc(len);
-    ret = nbd_opt_read(client, query, len, true, errp);
-    if (ret <= 0) {
-        g_free(query);
-        return ret;
+    if (!*query) {
+        trace_nbd_negotiate_meta_query_parse("empty");
+        return client->opt == NBD_OPT_LIST_META_CONTEXT;
     }
-
-    if (strncmp(query, pattern, len) == 0) {
+    if (strcmp(query, pattern) == 0) {
         trace_nbd_negotiate_meta_query_parse(pattern);
-        *match = true;
-    } else {
-        trace_nbd_negotiate_meta_query_skip("pattern not matched");
+        return true;
     }
-    g_free(query);
-
-    return 1;
+    trace_nbd_negotiate_meta_query_skip("pattern not matched");
+    return false;
 }
 
 /*
- * Read @len bytes, and set @match to true if they match @pattern, or if @len
- * is 0 and the client is performing _LIST_. @match is never set to false.
- *
- * Return -errno on I/O error, 0 if option was completely handled by
- * sending a reply about inconsistent lengths, or 1 on success.
- *
- * Note: return code = 1 doesn't mean that we've read exactly @pattern.
- * It only means that there are no errors.
+ * Return true and adjust @str in place if it begins with @prefix.
  */
-static int nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern,
-                                     uint32_t len, bool *match, Error **errp)
+static bool nbd_strshift(const char **str, const char *prefix)
 {
-    if (len == 0) {
-        if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
-            *match = true;
-        }
-        trace_nbd_negotiate_meta_query_parse("empty");
-        return 1;
-    }
+    size_t len = strlen(prefix);
 
-    if (len != strlen(pattern)) {
-        trace_nbd_negotiate_meta_query_skip("different lengths");
-        return nbd_opt_skip(client, len, errp);
+    if (strncmp(*str, prefix, len) == 0) {
+        *str += len;
+        return true;
     }
-
-    return nbd_meta_pattern(client, pattern, match, errp);
+    return false;
 }
 
 /* nbd_meta_base_query
  *
  * Handle queries to 'base' namespace. For now, only the base:allocation
- * context is available.  'len' is the amount of text remaining to be read from
- * the current name, after the 'base:' portion has been stripped.
- *
- * Return -errno on I/O error, 0 if option was completely handled by
- * sending a reply about inconsistent lengths, or 1 on success.
+ * context is available.  Return true if @query has been handled.
  */
-static int nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
-                               uint32_t len, Error **errp)
+static bool nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
+                                const char *query)
 {
-    return nbd_meta_empty_or_pattern(client, "allocation", len,
-                                     &meta->base_allocation, errp);
+    if (!nbd_strshift(&query, "base:")) {
+        return false;
+    }
+    trace_nbd_negotiate_meta_query_parse("base:");
+
+    if (nbd_meta_empty_or_pattern(client, "allocation", query)) {
+        meta->base_allocation = true;
+    }
+    return true;
 }
 
-/* nbd_meta_bitmap_query
+/* nbd_meta_qemu_query
  *
- * Handle query to 'qemu:' namespace.
- * @len is the amount of text remaining to be read from the current name, after
- * the 'qemu:' portion has been stripped.
- *
- * Return -errno on I/O error, 0 if option was completely handled by
- * sending a reply about inconsistent lengths, or 1 on success. */
-static int nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
-                               uint32_t len, Error **errp)
+ * Handle queries to 'qemu' namespace. For now, only the qemu:dirty-bitmap:
+ * context is available.  Return true if @query has been handled.
+ */
+static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
+                                const char *query)
 {
-    bool dirty_bitmap = false;
-    size_t dirty_bitmap_len = strlen("dirty-bitmap:");
-    int ret;
-
-    if (!meta->exp->export_bitmap) {
-        trace_nbd_negotiate_meta_query_skip("no dirty-bitmap exported");
-        return nbd_opt_skip(client, len, errp);
+    if (!nbd_strshift(&query, "qemu:")) {
+        return false;
     }
+    trace_nbd_negotiate_meta_query_parse("qemu:");
 
-    if (len == 0) {
+    if (!*query) {
         if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
-            meta->bitmap = true;
+            meta->bitmap = !!meta->exp->export_bitmap;
         }
         trace_nbd_negotiate_meta_query_parse("empty");
-        return 1;
+        return true;
     }
 
-    if (len < dirty_bitmap_len) {
-        trace_nbd_negotiate_meta_query_skip("not dirty-bitmap:");
-        return nbd_opt_skip(client, len, errp);
+    if (nbd_strshift(&query, "dirty-bitmap:")) {
+        trace_nbd_negotiate_meta_query_parse("dirty-bitmap:");
+        if (!meta->exp->export_bitmap) {
+            trace_nbd_negotiate_meta_query_skip("no dirty-bitmap exported");
+            return true;
+        }
+        if (nbd_meta_empty_or_pattern(client,
+                                      meta->exp->export_bitmap_context +
+                                      strlen("qemu:dirty-bitmap:"), query)) {
+            meta->bitmap = true;
+        }
+        return true;
     }
 
-    len -= dirty_bitmap_len;
-    ret = nbd_meta_pattern(client, "dirty-bitmap:", &dirty_bitmap, errp);
-    if (ret <= 0) {
-        return ret;
-    }
-    if (!dirty_bitmap) {
-        trace_nbd_negotiate_meta_query_skip("not dirty-bitmap:");
-        return nbd_opt_skip(client, len, errp);
-    }
-
-    trace_nbd_negotiate_meta_query_parse("dirty-bitmap:");
-
-    return nbd_meta_empty_or_pattern(
-            client, meta->exp->export_bitmap_context +
-            strlen("qemu:dirty_bitmap:"), len, &meta->bitmap, errp);
+    trace_nbd_negotiate_meta_query_skip("not dirty-bitmap");
+    return true;
 }
 
 /* nbd_negotiate_meta_query
@@ -935,22 +895,13 @@ static int nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
  *
  * The only supported namespaces are 'base' and 'qemu'.
  *
- * The function aims not wasting time and memory to read long unknown namespace
- * names.
- *
  * Return -errno on I/O error, 0 if option was completely handled by
  * sending a reply about inconsistent lengths, or 1 on success. */
 static int nbd_negotiate_meta_query(NBDClient *client,
                                     NBDExportMetaContexts *meta, Error **errp)
 {
-    /*
-     * Both 'qemu' and 'base' namespaces have length = 5 including a
-     * colon. If another length namespace is later introduced, this
-     * should certainly be refactored.
-     */
     int ret;
-    size_t ns_len = 5;
-    char ns[5];
+    g_autofree char *query = NULL;
     uint32_t len;
 
     ret = nbd_opt_read(client, &len, sizeof(len), false, errp);
@@ -963,27 +914,23 @@ static int nbd_negotiate_meta_query(NBDClient *client,
         trace_nbd_negotiate_meta_query_skip("length too long");
         return nbd_opt_skip(client, len, errp);
     }
-    if (len < ns_len) {
-        trace_nbd_negotiate_meta_query_skip("length too short");
-        return nbd_opt_skip(client, len, errp);
-    }
 
-    len -= ns_len;
-    ret = nbd_opt_read(client, ns, ns_len, true, errp);
+    query = g_malloc(len + 1);
+    ret = nbd_opt_read(client, query, len, true, errp);
     if (ret <= 0) {
         return ret;
     }
+    query[len] = '\0';
 
-    if (!strncmp(ns, "base:", ns_len)) {
-        trace_nbd_negotiate_meta_query_parse("base:");
-        return nbd_meta_base_query(client, meta, len, errp);
-    } else if (!strncmp(ns, "qemu:", ns_len)) {
-        trace_nbd_negotiate_meta_query_parse("qemu:");
-        return nbd_meta_qemu_query(client, meta, len, errp);
+    if (nbd_meta_base_query(client, meta, query)) {
+        return 1;
+    }
+    if (nbd_meta_qemu_query(client, meta, query)) {
+        return 1;
     }
 
     trace_nbd_negotiate_meta_query_skip("unknown namespace");
-    return nbd_opt_skip(client, len, errp);
+    return 1;
 }
 
 /* nbd_negotiate_meta_queries