From b4af1df08f9433d858ec01aff4ec9f06cb28128b Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Tue, 27 Oct 2020 17:44:15 +0100
Subject: [PATCH 01/12] iotests/291: Filter irrelevant parts of img-info

We need to let _img_info emit the format-specific information so we get
the list of bitmaps we want, but we do not need anything but the
bitmaps.  So filter out everything that is irrelevant to us.  (Ideally,
this would be a generalized function in common.filters that takes a list
of things to keep, but that would require implementing an anti-bitmap
filter, which would be hard, and which we do not need here.  So that is
why this function is just a local hack.)

This lets 291 pass with qcow2 options like refcount_bits or data_file
again.

Fixes: 14f16bf9474c860ecc127a66a86961942319f7af
       ("qemu-img: Support bitmap --merge into backing image")
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-Id: <20201027164416.144115-2-mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
---
 tests/qemu-iotests/291     | 14 +++++++++++---
 tests/qemu-iotests/291.out | 20 --------------------
 2 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/tests/qemu-iotests/291 b/tests/qemu-iotests/291
index 4f837b2056..77fa38f93d 100755
--- a/tests/qemu-iotests/291
+++ b/tests/qemu-iotests/291
@@ -42,6 +42,14 @@ _require_command QEMU_NBD
 # compat=0.10 does not support bitmaps
 _unsupported_imgopts 'compat=0.10'
 
+# Filter irrelevant format-specific information from the qemu-img info
+# output (we only want the bitmaps, basically)
+_filter_irrelevant_img_info()
+{
+    grep -v -e 'compat' -e 'compression type' -e 'data file' -e 'extended l2' \
+            -e 'lazy refcounts' -e 'refcount bits'
+}
+
 echo
 echo "=== Initial image setup ==="
 echo
@@ -79,7 +87,7 @@ echo
 
 # Only bitmaps from the active layer are copied
 $QEMU_IMG convert --bitmaps -O qcow2 "$TEST_IMG.orig" "$TEST_IMG"
-_img_info --format-specific
+_img_info --format-specific | _filter_irrelevant_img_info
 # But we can also merge in bitmaps from other layers.  This test is a bit
 # contrived to cover more code paths, in reality, you could merge directly
 # into b0 without going through tmp
@@ -89,7 +97,7 @@ $QEMU_IMG bitmap --add --merge b0 -b "$TEST_IMG.base" -F $IMGFMT \
 $QEMU_IMG bitmap --merge tmp -f $IMGFMT "$TEST_IMG" b0
 $QEMU_IMG bitmap --remove --image-opts \
     driver=$IMGFMT,file.driver=file,file.filename="$TEST_IMG" tmp
-_img_info --format-specific
+_img_info --format-specific | _filter_irrelevant_img_info
 
 echo
 echo "=== Merge from top layer into backing image ==="
@@ -98,7 +106,7 @@ echo
 $QEMU_IMG rebase -u -F qcow2 -b "$TEST_IMG.base" "$TEST_IMG"
 $QEMU_IMG bitmap --add --merge b2 -b "$TEST_IMG" -F $IMGFMT \
      -f $IMGFMT "$TEST_IMG.base" b3
-_img_info --format-specific --backing-chain
+_img_info --format-specific --backing-chain | _filter_irrelevant_img_info
 
 echo
 echo "=== Check bitmap contents ==="
diff --git a/tests/qemu-iotests/291.out b/tests/qemu-iotests/291.out
index 3990f7aacc..23411c0ff4 100644
--- a/tests/qemu-iotests/291.out
+++ b/tests/qemu-iotests/291.out
@@ -26,9 +26,6 @@ file format: IMGFMT
 virtual size: 10 MiB (10485760 bytes)
 cluster_size: 65536
 Format specific information:
-    compat: 1.1
-    compression type: zlib
-    lazy refcounts: false
     bitmaps:
         [0]:
             flags:
@@ -39,17 +36,12 @@ Format specific information:
                 [0]: auto
             name: b2
             granularity: 65536
-    refcount bits: 16
     corrupt: false
-    extended l2: false
 image: TEST_DIR/t.IMGFMT
 file format: IMGFMT
 virtual size: 10 MiB (10485760 bytes)
 cluster_size: 65536
 Format specific information:
-    compat: 1.1
-    compression type: zlib
-    lazy refcounts: false
     bitmaps:
         [0]:
             flags:
@@ -64,9 +56,7 @@ Format specific information:
             flags:
             name: b0
             granularity: 65536
-    refcount bits: 16
     corrupt: false
-    extended l2: false
 
 === Merge from top layer into backing image ===
 
@@ -77,9 +67,6 @@ cluster_size: 65536
 backing file: TEST_DIR/t.IMGFMT.base
 backing file format: IMGFMT
 Format specific information:
-    compat: 1.1
-    compression type: zlib
-    lazy refcounts: false
     bitmaps:
         [0]:
             flags:
@@ -94,18 +81,13 @@ Format specific information:
             flags:
             name: b0
             granularity: 65536
-    refcount bits: 16
     corrupt: false
-    extended l2: false
 
 image: TEST_DIR/t.IMGFMT.base
 file format: IMGFMT
 virtual size: 10 MiB (10485760 bytes)
 cluster_size: 65536
 Format specific information:
-    compat: 1.1
-    compression type: zlib
-    lazy refcounts: false
     bitmaps:
         [0]:
             flags:
@@ -117,9 +99,7 @@ Format specific information:
                 [0]: auto
             name: b3
             granularity: 65536
-    refcount bits: 16
     corrupt: false
-    extended l2: false
 
 === Check bitmap contents ===
 

From cb7223ac33fa35976d64f31145122965c1ba332d Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Tue, 27 Oct 2020 17:44:16 +0100
Subject: [PATCH 02/12] iotests/291: Stop NBD server

nbd_server_start_unix_socket() includes an implicit nbd_server_stop(),
but we still need an explicit one at the end of the test (where there
follows no next nbd_server_start_unix_socket()), or qemu-nbd will linger
until the test exits.

This will become important when enabling this test to run on FUSE
exports, because then the export (which is the image used by qemu-nbd)
will go away before qemu-nbd exits, which will lead to qemu-nbd
complaining that it cannot flush the bitmaps in the image.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-Id: <20201027164416.144115-3-mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
---
 tests/qemu-iotests/291 | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/qemu-iotests/291 b/tests/qemu-iotests/291
index 77fa38f93d..b7320bc7ad 100755
--- a/tests/qemu-iotests/291
+++ b/tests/qemu-iotests/291
@@ -128,6 +128,8 @@ nbd_server_start_unix_socket -r -f qcow2 -B b3 "$TEST_IMG"
 $QEMU_IMG map --output=json --image-opts \
     "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b3" | _filter_qemu_img_map
 
+nbd_server_stop
+
 # success, all done
 echo '*** done'
 rm -f $seq.full

From 159f8442382cff6ef6d858dbf02f03f0cc95b4a7 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Tue, 27 Oct 2020 00:05:46 -0500
Subject: [PATCH 03/12] block: Simplify QAPI_LIST_ADD

There is no need to rely on the verbosity of the gcc/clang compiler
extension of g_new(typeof(X), 1) when we can instead use the standard
g_malloc(sizeof(X)).  In general, we like g_new over g_malloc for
returning type X rather than void* to let the compiler catch more
potential typing mistakes, but in this particular macro, our other use
of typeof on the same line already ensures we are getting correct
results.

Suggested-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20201027050556.269064-2-eblake@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
---
 block.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block.c b/block.c
index ee5b28a979..dd551d7ea9 100644
--- a/block.c
+++ b/block.c
@@ -5240,7 +5240,7 @@ BlockDeviceInfoList *bdrv_named_nodes_list(bool flat,
 }
 
 #define QAPI_LIST_ADD(list, element) do { \
-    typeof(list) _tmp = g_new(typeof(*(list)), 1); \
+    typeof(list) _tmp = g_malloc(sizeof(*(list))); \
     _tmp->value = (element); \
     _tmp->next = (list); \
     (list) = _tmp; \

From 9812e7125b83ba6e4645237150ca5f61bf66197f Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Tue, 27 Oct 2020 00:05:47 -0500
Subject: [PATCH 04/12] qapi: Add QAPI_LIST_PREPEND() macro

block.c has a useful macro QAPI_LIST_ADD() for inserting at the front
of any QAPI-generated list; move it from block.c to qapi/util.h so
more places can use it, including one earlier place in block.c, and
rename it to something more obvious (since we also have a lot of
places that append, rather than prepend, to a list).

There are many more places in the codebase that can benefit from using
the macro, but converting them will be left to later patches.

In theory, all QAPI list types are child classes of GenericList; but
in practice, that relationship is not explicitly spelled out in the C
type declarations (rather, it is something that happens implicitly due
to C compatible layouts), and the macro does not actually depend on
the GenericList type.  We considered moving GenericList from visitor.h
into util.h to group related code; however, such a move would be
awkward if we do not also move GenericAlternate.  Unfortunately,
moving GenericAlternate would introduce its own problems of
declaration circularity (qapi-builtin-types.h needs a complete
definition of QEnumLookup from util.h, but GenericAlternate needs a
complete definition of QType from qapi-builtin-types.h).

Suggested-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20201027050556.269064-3-eblake@redhat.com>
[eblake: s/ADD/PREPEND/ per suggestion by Markus]
---
 block.c             | 22 ++++++----------------
 include/qapi/util.h | 13 +++++++++++++
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/block.c b/block.c
index dd551d7ea9..56bacc9e9f 100644
--- a/block.c
+++ b/block.c
@@ -5220,7 +5220,7 @@ BlockDriverState *bdrv_find_node(const char *node_name)
 BlockDeviceInfoList *bdrv_named_nodes_list(bool flat,
                                            Error **errp)
 {
-    BlockDeviceInfoList *list, *entry;
+    BlockDeviceInfoList *list;
     BlockDriverState *bs;
 
     list = NULL;
@@ -5230,22 +5230,12 @@ BlockDeviceInfoList *bdrv_named_nodes_list(bool flat,
             qapi_free_BlockDeviceInfoList(list);
             return NULL;
         }
-        entry = g_malloc0(sizeof(*entry));
-        entry->value = info;
-        entry->next = list;
-        list = entry;
+        QAPI_LIST_PREPEND(list, info);
     }
 
     return list;
 }
 
-#define QAPI_LIST_ADD(list, element) do { \
-    typeof(list) _tmp = g_malloc(sizeof(*(list))); \
-    _tmp->value = (element); \
-    _tmp->next = (list); \
-    (list) = _tmp; \
-} while (0)
-
 typedef struct XDbgBlockGraphConstructor {
     XDbgBlockGraph *graph;
     GHashTable *graph_nodes;
@@ -5300,7 +5290,7 @@ static void xdbg_graph_add_node(XDbgBlockGraphConstructor *gr, void *node,
     n->type = type;
     n->name = g_strdup(name);
 
-    QAPI_LIST_ADD(gr->graph->nodes, n);
+    QAPI_LIST_PREPEND(gr->graph->nodes, n);
 }
 
 static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent,
@@ -5319,14 +5309,14 @@ static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent,
         uint64_t flag = bdrv_qapi_perm_to_blk_perm(qapi_perm);
 
         if (flag & child->perm) {
-            QAPI_LIST_ADD(edge->perm, qapi_perm);
+            QAPI_LIST_PREPEND(edge->perm, qapi_perm);
         }
         if (flag & child->shared_perm) {
-            QAPI_LIST_ADD(edge->shared_perm, qapi_perm);
+            QAPI_LIST_PREPEND(edge->shared_perm, qapi_perm);
         }
     }
 
-    QAPI_LIST_ADD(gr->graph->edges, edge);
+    QAPI_LIST_PREPEND(gr->graph->edges, edge);
 }
 
 
diff --git a/include/qapi/util.h b/include/qapi/util.h
index a7c3c64148..bc312e90aa 100644
--- a/include/qapi/util.h
+++ b/include/qapi/util.h
@@ -22,4 +22,17 @@ int qapi_enum_parse(const QEnumLookup *lookup, const char *buf,
 
 int parse_qapi_name(const char *name, bool complete);
 
+/*
+ * For any GenericList @list, insert @element at the front.
+ *
+ * Note that this macro evaluates @element exactly once, so it is safe
+ * to have side-effects with that argument.
+ */
+#define QAPI_LIST_PREPEND(list, element) do { \
+    typeof(list) _tmp = g_malloc(sizeof(*(list))); \
+    _tmp->value = (element); \
+    _tmp->next = (list); \
+    (list) = _tmp; \
+} while (0)
+
 #endif

From 8675cbd68be7da35a61e391fbb5b95ac67bc7d72 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Tue, 27 Oct 2020 00:05:48 -0500
Subject: [PATCH 05/12] nbd: Utilize QAPI_CLONE for type conversion

Rather than open-coding the translation from the deprecated
NbdServerAddOptions type to the preferred BlockExportOptionsNbd, it's
better to utilize QAPI_CLONE_MEMBERS.  This solves a couple of issues:
first, if we do any more refactoring of the base type (which an
upcoming patch plans to do), we don't have to revisit the open-coding.
Second, our assignment to arg->name is fishy: the generated QAPI code
for qapi_free_NbdServerAddOptions does not visit arg->name if
arg->has_name is false, but if it DID visit it, we would have
introduced a double-free situation when arg is finally freed.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20201027050556.269064-4-eblake@redhat.com>
---
 blockdev-nbd.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 8174023e5c..cee9134b12 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -14,6 +14,8 @@
 #include "sysemu/block-backend.h"
 #include "hw/block/block.h"
 #include "qapi/error.h"
+#include "qapi/clone-visitor.h"
+#include "qapi/qapi-visit-block-export.h"
 #include "qapi/qapi-commands-block-export.h"
 #include "block/nbd.h"
 #include "io/channel-socket.h"
@@ -195,7 +197,8 @@ void qmp_nbd_server_add(NbdServerAddOptions *arg, Error **errp)
      * the device name as a default here for compatibility.
      */
     if (!arg->has_name) {
-        arg->name = arg->device;
+        arg->has_name = true;
+        arg->name = g_strdup(arg->device);
     }
 
     export_opts = g_new(BlockExportOptions, 1);
@@ -205,15 +208,9 @@ void qmp_nbd_server_add(NbdServerAddOptions *arg, Error **errp)
         .node_name              = g_strdup(bdrv_get_node_name(bs)),
         .has_writable           = arg->has_writable,
         .writable               = arg->writable,
-        .u.nbd = {
-            .has_name           = true,
-            .name               = g_strdup(arg->name),
-            .has_description    = arg->has_description,
-            .description        = g_strdup(arg->description),
-            .has_bitmap         = arg->has_bitmap,
-            .bitmap             = g_strdup(arg->bitmap),
-        },
     };
+    QAPI_CLONE_MEMBERS(BlockExportOptionsNbd, &export_opts->u.nbd,
+                       qapi_NbdServerAddOptions_base(arg));
 
     /*
      * nbd-server-add doesn't complain when a read-only device should be

From cbad81cef8cc7b220f04600997ea29d7302bae00 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Tue, 27 Oct 2020 00:05:49 -0500
Subject: [PATCH 06/12] nbd: Update qapi to support exporting multiple bitmaps

Since 'block-export-add' is new to 5.2, we can still tweak the
interface; there, allowing 'bitmaps':['str'] is nicer than
'bitmap':'str'.  This wires up the qapi and qemu-nbd changes to permit
passing multiple bitmaps as distinct metadata contexts that the NBD
client may request, but the actual support for more than one will
require a further patch to the server.

Note that there are no changes made to the existing deprecated
'nbd-server-add' command; this required splitting the QAPI type
BlockExportOptionsNbd, which fortunately does not affect QMP
introspection.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20201027050556.269064-5-eblake@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Peter Krempa <pkrempa@redhat.com>
---
 blockdev-nbd.c             |  6 +++++-
 docs/system/deprecated.rst |  3 ++-
 nbd/server.c               | 19 ++++++++++++------
 qapi/block-export.json     | 41 +++++++++++++++++++++++++++-----------
 qemu-nbd.c                 | 18 ++++++++---------
 5 files changed, 58 insertions(+), 29 deletions(-)

diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index cee9134b12..d8443d235b 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -209,8 +209,12 @@ void qmp_nbd_server_add(NbdServerAddOptions *arg, Error **errp)
         .has_writable           = arg->has_writable,
         .writable               = arg->writable,
     };
-    QAPI_CLONE_MEMBERS(BlockExportOptionsNbd, &export_opts->u.nbd,
+    QAPI_CLONE_MEMBERS(BlockExportOptionsNbdBase, &export_opts->u.nbd,
                        qapi_NbdServerAddOptions_base(arg));
+    if (arg->has_bitmap) {
+        export_opts->u.nbd.has_bitmaps = true;
+        QAPI_LIST_PREPEND(export_opts->u.nbd.bitmaps, g_strdup(arg->bitmap));
+    }
 
     /*
      * nbd-server-add doesn't complain when a read-only device should be
diff --git a/docs/system/deprecated.rst b/docs/system/deprecated.rst
index 0ebce37a19..32a0e620db 100644
--- a/docs/system/deprecated.rst
+++ b/docs/system/deprecated.rst
@@ -257,7 +257,8 @@ the 'wait' field, which is only applicable to sockets in server mode
 ''''''''''''''''''''''''''''''''''''''''''''''''''''''''
 
 Use the more generic commands ``block-export-add`` and ``block-export-del``
-instead.
+instead.  As part of this deprecation, where ``nbd-server-add`` used a
+single ``bitmap``, the new ``block-export-add`` uses a list of ``bitmaps``.
 
 Human Monitor Protocol (HMP) commands
 -------------------------------------
diff --git a/nbd/server.c b/nbd/server.c
index 08b621f70a..8d01662b45 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1474,6 +1474,7 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
     uint64_t perm, shared_perm;
     bool readonly = !exp_args->writable;
     bool shared = !exp_args->writable;
+    strList *bitmaps;
     int ret;
 
     assert(exp_args->type == BLOCK_EXPORT_TYPE_NBD);
@@ -1533,12 +1534,18 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
     }
     exp->size = QEMU_ALIGN_DOWN(size, BDRV_SECTOR_SIZE);
 
-    if (arg->bitmap) {
+    /* XXX Allow more than one bitmap */
+    if (arg->bitmaps && arg->bitmaps->next) {
+        error_setg(errp, "multiple bitmaps per export not supported yet");
+        return -EOPNOTSUPP;
+    }
+    for (bitmaps = arg->bitmaps; bitmaps; bitmaps = bitmaps->next) {
+        const char *bitmap = bitmaps->value;
         BlockDriverState *bs = blk_bs(blk);
         BdrvDirtyBitmap *bm = NULL;
 
         while (bs) {
-            bm = bdrv_find_dirty_bitmap(bs, arg->bitmap);
+            bm = bdrv_find_dirty_bitmap(bs, bitmap);
             if (bm != NULL) {
                 break;
             }
@@ -1548,7 +1555,7 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
 
         if (bm == NULL) {
             ret = -ENOENT;
-            error_setg(errp, "Bitmap '%s' is not found", arg->bitmap);
+            error_setg(errp, "Bitmap '%s' is not found", bitmap);
             goto fail;
         }
 
@@ -1562,15 +1569,15 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
             ret = -EINVAL;
             error_setg(errp,
                        "Enabled bitmap '%s' incompatible with readonly export",
-                       arg->bitmap);
+                       bitmap);
             goto fail;
         }
 
         bdrv_dirty_bitmap_set_busy(bm, true);
         exp->export_bitmap = bm;
-        assert(strlen(arg->bitmap) <= BDRV_BITMAP_MAX_NAME_SIZE);
+        assert(strlen(bitmap) <= BDRV_BITMAP_MAX_NAME_SIZE);
         exp->export_bitmap_context = g_strdup_printf("qemu:dirty-bitmap:%s",
-                                                     arg->bitmap);
+                                                     bitmap);
         assert(strlen(exp->export_bitmap_context) < NBD_MAX_STRING_SIZE);
     }
 
diff --git a/qapi/block-export.json b/qapi/block-export.json
index 480c497690..c4125f4d21 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -63,10 +63,10 @@
             '*max-connections': 'uint32' } }
 
 ##
-# @BlockExportOptionsNbd:
+# @BlockExportOptionsNbdBase:
 #
-# An NBD block export (options shared between nbd-server-add and the NBD branch
-# of block-export-add).
+# An NBD block export (common options shared between nbd-server-add and
+# the NBD branch of block-export-add).
 #
 # @name: Export name. If unspecified, the @device parameter is used as the
 #        export name. (Since 2.12)
@@ -74,15 +74,27 @@
 # @description: Free-form description of the export, up to 4096 bytes.
 #               (Since 5.0)
 #
-# @bitmap: Also export the dirty bitmap reachable from @device, so the
-#          NBD client can use NBD_OPT_SET_META_CONTEXT with
-#          "qemu:dirty-bitmap:NAME" to inspect the bitmap. (since 4.0)
-#
 # Since: 5.0
 ##
+{ 'struct': 'BlockExportOptionsNbdBase',
+  'data': { '*name': 'str', '*description': 'str' } }
+
+##
+# @BlockExportOptionsNbd:
+#
+# An NBD block export (distinct options used in the NBD branch of
+# block-export-add).
+#
+# @bitmaps: Also export each of the named dirty bitmaps reachable from
+#           @device, so the NBD client can use NBD_OPT_SET_META_CONTEXT with
+#           the metadata context name "qemu:dirty-bitmap:BITMAP" to inspect
+#           each bitmap.
+#
+# Since: 5.2
+##
 { 'struct': 'BlockExportOptionsNbd',
-  'data': { '*name': 'str', '*description': 'str',
-            '*bitmap': 'str' } }
+  'base': 'BlockExportOptionsNbdBase',
+  'data': { '*bitmaps': ['str'] } }
 
 ##
 # @BlockExportOptionsVhostUserBlk:
@@ -106,19 +118,24 @@
 ##
 # @NbdServerAddOptions:
 #
-# An NBD block export.
+# An NBD block export, per legacy nbd-server-add command.
 #
 # @device: The device name or node name of the node to be exported
 #
 # @writable: Whether clients should be able to write to the device via the
 #            NBD connection (default false).
 #
+# @bitmap: Also export a single dirty bitmap reachable from @device, so the
+#          NBD client can use NBD_OPT_SET_META_CONTEXT with the metadata
+#          context name "qemu:dirty-bitmap:BITMAP" to inspect the bitmap
+#          (since 4.0).
+#
 # Since: 5.0
 ##
 { 'struct': 'NbdServerAddOptions',
-  'base': 'BlockExportOptionsNbd',
+  'base': 'BlockExportOptionsNbdBase',
   'data': { 'device': 'str',
-            '*writable': 'bool' } }
+            '*writable': 'bool', '*bitmap': 'str' } }
 
 ##
 # @nbd-server-add:
diff --git a/qemu-nbd.c b/qemu-nbd.c
index a0701cdf36..74e73a7465 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -574,7 +574,7 @@ int main(int argc, char **argv)
     QDict *options = NULL;
     const char *export_name = NULL; /* defaults to "" later for server mode */
     const char *export_description = NULL;
-    const char *bitmap = NULL;
+    strList *bitmaps = NULL;
     const char *tlscredsid = NULL;
     bool imageOpts = false;
     bool writethrough = true;
@@ -690,7 +690,7 @@ int main(int argc, char **argv)
             flags &= ~BDRV_O_RDWR;
             break;
         case 'B':
-            bitmap = optarg;
+            QAPI_LIST_PREPEND(bitmaps, g_strdup(optarg));
             break;
         case 'k':
             sockpath = optarg;
@@ -786,7 +786,7 @@ int main(int argc, char **argv)
             exit(EXIT_FAILURE);
         }
         if (export_name || export_description || dev_offset ||
-            device || disconnect || fmt || sn_id_or_name || bitmap ||
+            device || disconnect || fmt || sn_id_or_name || bitmaps ||
             seen_aio || seen_discard || seen_cache) {
             error_report("List mode is incompatible with per-device settings");
             exit(EXIT_FAILURE);
@@ -1067,12 +1067,12 @@ int main(int argc, char **argv)
         .has_writable       = true,
         .writable           = !readonly,
         .u.nbd = {
-            .has_name           = true,
-            .name               = g_strdup(export_name),
-            .has_description    = !!export_description,
-            .description        = g_strdup(export_description),
-            .has_bitmap         = !!bitmap,
-            .bitmap             = g_strdup(bitmap),
+            .has_name             = true,
+            .name                 = g_strdup(export_name),
+            .has_description      = !!export_description,
+            .description          = g_strdup(export_description),
+            .has_bitmaps          = !!bitmaps,
+            .bitmaps              = bitmaps,
         },
     };
     blk_exp_add(export_opts, &error_fatal);

From 02e87e3b1c77422b71855102b9cb884e4c7823f7 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Tue, 27 Oct 2020 00:05:50 -0500
Subject: [PATCH 07/12] nbd: Simplify qemu bitmap context name

Each dirty bitmap already knows its name; by reducing the scope of the
places where we construct "qemu:dirty-bitmap:NAME" strings, tracking
the name is more localized, and there are fewer per-export fields to
worry about.  This in turn will make it easier for an upcoming patch
to export more than one bitmap at once.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20201027050556.269064-6-eblake@redhat.com>
---
 nbd/server.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 8d01662b45..77fdecdf9d 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -95,7 +95,6 @@ struct NBDExport {
     Notifier eject_notifier;
 
     BdrvDirtyBitmap *export_bitmap;
-    char *export_bitmap_context;
 };
 
 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
@@ -871,14 +870,15 @@ static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
     }
 
     if (nbd_strshift(&query, "dirty-bitmap:")) {
+        const char *bm_name;
+
         trace_nbd_negotiate_meta_query_parse("dirty-bitmap:");
         if (!meta->exp->export_bitmap) {
             trace_nbd_negotiate_meta_query_skip("no dirty-bitmap exported");
             return true;
         }
-        if (nbd_meta_empty_or_pattern(client,
-                                      meta->exp->export_bitmap_context +
-                                      strlen("qemu:dirty-bitmap:"), query)) {
+        bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmap);
+        if (nbd_meta_empty_or_pattern(client, bm_name, query)) {
             meta->bitmap = true;
         }
         return true;
@@ -1004,8 +1004,11 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
     }
 
     if (meta->bitmap) {
-        ret = nbd_negotiate_send_meta_context(client,
-                                              meta->exp->export_bitmap_context,
+        const char *bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmap);
+        g_autofree char *context = g_strdup_printf("qemu:dirty-bitmap:%s",
+                                                   bm_name);
+
+        ret = nbd_negotiate_send_meta_context(client, context,
                                               NBD_META_ID_DIRTY_BITMAP,
                                               errp);
         if (ret < 0) {
@@ -1576,9 +1579,6 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
         bdrv_dirty_bitmap_set_busy(bm, true);
         exp->export_bitmap = bm;
         assert(strlen(bitmap) <= BDRV_BITMAP_MAX_NAME_SIZE);
-        exp->export_bitmap_context = g_strdup_printf("qemu:dirty-bitmap:%s",
-                                                     bitmap);
-        assert(strlen(exp->export_bitmap_context) < NBD_MAX_STRING_SIZE);
     }
 
     blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
@@ -1656,7 +1656,6 @@ static void nbd_export_delete(BlockExport *blk_exp)
 
     if (exp->export_bitmap) {
         bdrv_dirty_bitmap_set_busy(exp->export_bitmap, false);
-        g_free(exp->export_bitmap_context);
     }
 }
 

From 47ec485e8d5dba978126d31b9484d717ac1222ba Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Tue, 27 Oct 2020 00:05:51 -0500
Subject: [PATCH 08/12] nbd: Refactor counting of metadata contexts

Rather than open-code the count of negotiated contexts at several
sites, embed it directly into the struct.  This will make it easier
for upcoming commits to support even more simultaneous contexts.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20201027050556.269064-7-eblake@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 nbd/server.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 77fdecdf9d..42d494bc96 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -104,8 +104,7 @@ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
  * NBD_OPT_LIST_META_CONTEXT. */
 typedef struct NBDExportMetaContexts {
     NBDExport *exp;
-    bool valid; /* means that negotiation of the option finished without
-                   errors */
+    size_t count; /* number of negotiated contexts */
     bool base_allocation; /* export base:allocation context (block status) */
     bool bitmap; /* export qemu:dirty-bitmap:<export bitmap name> */
 } NBDExportMetaContexts;
@@ -445,7 +444,9 @@ static int nbd_negotiate_handle_list(NBDClient *client, Error **errp)
 
 static void nbd_check_meta_export(NBDClient *client)
 {
-    client->export_meta.valid &= client->exp == client->export_meta.exp;
+    if (client->exp != client->export_meta.exp) {
+        client->export_meta.count = 0;
+    }
 }
 
 /* Send a reply to NBD_OPT_EXPORT_NAME.
@@ -945,6 +946,7 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
     NBDExportMetaContexts local_meta;
     uint32_t nb_queries;
     int i;
+    size_t count = 0;
 
     if (!client->structured_reply) {
         return nbd_opt_invalid(client, errp,
@@ -1001,6 +1003,7 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
         if (ret < 0) {
             return ret;
         }
+        count++;
     }
 
     if (meta->bitmap) {
@@ -1014,11 +1017,12 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
         if (ret < 0) {
             return ret;
         }
+        count++;
     }
 
     ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
     if (ret == 0) {
-        meta->valid = true;
+        meta->count = count;
     }
 
     return ret;
@@ -2337,18 +2341,16 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
             return nbd_send_generic_reply(client, request->handle, -EINVAL,
                                           "need non-zero length", errp);
         }
-        if (client->export_meta.valid &&
-            (client->export_meta.base_allocation ||
-             client->export_meta.bitmap))
-        {
+        if (client->export_meta.count) {
             bool dont_fragment = request->flags & NBD_CMD_FLAG_REQ_ONE;
+            int contexts_remaining = client->export_meta.count;
 
             if (client->export_meta.base_allocation) {
                 ret = nbd_co_send_block_status(client, request->handle,
                                                blk_bs(exp->common.blk),
                                                request->from,
                                                request->len, dont_fragment,
-                                               !client->export_meta.bitmap,
+                                               !--contexts_remaining,
                                                NBD_META_ID_BASE_ALLOCATION,
                                                errp);
                 if (ret < 0) {
@@ -2360,13 +2362,15 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
                 ret = nbd_co_send_bitmap(client, request->handle,
                                          client->exp->export_bitmap,
                                          request->from, request->len,
-                                         dont_fragment,
-                                         true, NBD_META_ID_DIRTY_BITMAP, errp);
+                                         dont_fragment, !--contexts_remaining,
+                                         NBD_META_ID_DIRTY_BITMAP, errp);
                 if (ret < 0) {
                     return ret;
                 }
             }
 
+            assert(!contexts_remaining);
+
             return 0;
         } else {
             return nbd_send_generic_reply(client, request->handle, -EINVAL,

From 3b1f244c59b7045680e615d50dc444a316abd891 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Tue, 27 Oct 2020 00:05:52 -0500
Subject: [PATCH 09/12] nbd: Allow export of multiple bitmaps for one device

With this, 'qemu-nbd -B b0 -B b1 -f qcow2 img.qcow2' can let you sniff
out multiple bitmaps from one server.  qemu-img as client can still
only read one bitmap per client connection, but other NBD clients
(hello libnbd) can now read multiple bitmaps in a single pass.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20201027050556.269064-8-eblake@redhat.com>
---
 nbd/server.c           | 100 ++++++++++++++++++++++++++++-------------
 tests/qemu-iotests/291 |   6 +--
 2 files changed, 72 insertions(+), 34 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 42d494bc96..b6841e4554 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -27,6 +27,7 @@
 #include "qemu/units.h"
 
 #define NBD_META_ID_BASE_ALLOCATION 0
+/* Dirty bitmaps use 'NBD_META_ID_DIRTY_BITMAP + i', so keep this id last. */
 #define NBD_META_ID_DIRTY_BITMAP 1
 
 /*
@@ -94,7 +95,8 @@ struct NBDExport {
     BlockBackend *eject_notifier_blk;
     Notifier eject_notifier;
 
-    BdrvDirtyBitmap *export_bitmap;
+    BdrvDirtyBitmap **export_bitmaps;
+    size_t nr_export_bitmaps;
 };
 
 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
@@ -106,7 +108,10 @@ typedef struct NBDExportMetaContexts {
     NBDExport *exp;
     size_t count; /* number of negotiated contexts */
     bool base_allocation; /* export base:allocation context (block status) */
-    bool bitmap; /* export qemu:dirty-bitmap:<export bitmap name> */
+    bool *bitmaps; /*
+                    * export qemu:dirty-bitmap:<export bitmap name>,
+                    * sized by exp->nr_export_bitmaps
+                    */
 } NBDExportMetaContexts;
 
 struct NBDClient {
@@ -857,6 +862,8 @@ static bool nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
 static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
                                 const char *query)
 {
+    size_t i;
+
     if (!nbd_strshift(&query, "qemu:")) {
         return false;
     }
@@ -864,24 +871,33 @@ static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
 
     if (!*query) {
         if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
-            meta->bitmap = !!meta->exp->export_bitmap;
+            memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
         }
         trace_nbd_negotiate_meta_query_parse("empty");
         return true;
     }
 
     if (nbd_strshift(&query, "dirty-bitmap:")) {
-        const char *bm_name;
-
         trace_nbd_negotiate_meta_query_parse("dirty-bitmap:");
-        if (!meta->exp->export_bitmap) {
-            trace_nbd_negotiate_meta_query_skip("no dirty-bitmap exported");
+        if (!*query) {
+            if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
+                memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
+            }
+            trace_nbd_negotiate_meta_query_parse("empty");
             return true;
         }
-        bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmap);
-        if (nbd_meta_empty_or_pattern(client, bm_name, query)) {
-            meta->bitmap = true;
+
+        for (i = 0; i < meta->exp->nr_export_bitmaps; i++) {
+            const char *bm_name;
+
+            bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]);
+            if (strcmp(bm_name, query) == 0) {
+                meta->bitmaps[i] = true;
+                trace_nbd_negotiate_meta_query_parse(query);
+                return true;
+            }
         }
+        trace_nbd_negotiate_meta_query_skip("no dirty-bitmap match");
         return true;
     }
 
@@ -943,9 +959,10 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
 {
     int ret;
     g_autofree char *export_name = NULL;
-    NBDExportMetaContexts local_meta;
+    g_autofree bool *bitmaps = NULL;
+    NBDExportMetaContexts local_meta = {0};
     uint32_t nb_queries;
-    int i;
+    size_t i;
     size_t count = 0;
 
     if (!client->structured_reply) {
@@ -960,6 +977,7 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
         meta = &local_meta;
     }
 
+    g_free(meta->bitmaps);
     memset(meta, 0, sizeof(*meta));
 
     ret = nbd_opt_read_name(client, &export_name, NULL, errp);
@@ -974,6 +992,10 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
         return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp,
                             "export '%s' not present", sane_name);
     }
+    meta->bitmaps = g_new0(bool, meta->exp->nr_export_bitmaps);
+    if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
+        bitmaps = meta->bitmaps;
+    }
 
     ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), false, errp);
     if (ret <= 0) {
@@ -986,7 +1008,7 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
     if (client->opt == NBD_OPT_LIST_META_CONTEXT && !nb_queries) {
         /* enable all known contexts */
         meta->base_allocation = true;
-        meta->bitmap = !!meta->exp->export_bitmap;
+        memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
     } else {
         for (i = 0; i < nb_queries; ++i) {
             ret = nbd_negotiate_meta_query(client, meta, errp);
@@ -1006,13 +1028,19 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
         count++;
     }
 
-    if (meta->bitmap) {
-        const char *bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmap);
-        g_autofree char *context = g_strdup_printf("qemu:dirty-bitmap:%s",
-                                                   bm_name);
+    for (i = 0; i < meta->exp->nr_export_bitmaps; i++) {
+        const char *bm_name;
+        g_autofree char *context = NULL;
+
+        if (!meta->bitmaps[i]) {
+            continue;
+        }
+
+        bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]);
+        context = g_strdup_printf("qemu:dirty-bitmap:%s", bm_name);
 
         ret = nbd_negotiate_send_meta_context(client, context,
-                                              NBD_META_ID_DIRTY_BITMAP,
+                                              NBD_META_ID_DIRTY_BITMAP + i,
                                               errp);
         if (ret < 0) {
             return ret;
@@ -1366,6 +1394,7 @@ void nbd_client_put(NBDClient *client)
             QTAILQ_REMOVE(&client->exp->clients, client, next);
             blk_exp_unref(&client->exp->common);
         }
+        g_free(client->export_meta.bitmaps);
         g_free(client);
     }
 }
@@ -1482,6 +1511,7 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
     bool readonly = !exp_args->writable;
     bool shared = !exp_args->writable;
     strList *bitmaps;
+    size_t i;
     int ret;
 
     assert(exp_args->type == BLOCK_EXPORT_TYPE_NBD);
@@ -1541,12 +1571,12 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
     }
     exp->size = QEMU_ALIGN_DOWN(size, BDRV_SECTOR_SIZE);
 
-    /* XXX Allow more than one bitmap */
-    if (arg->bitmaps && arg->bitmaps->next) {
-        error_setg(errp, "multiple bitmaps per export not supported yet");
-        return -EOPNOTSUPP;
-    }
     for (bitmaps = arg->bitmaps; bitmaps; bitmaps = bitmaps->next) {
+        exp->nr_export_bitmaps++;
+    }
+    exp->export_bitmaps = g_new0(BdrvDirtyBitmap *, exp->nr_export_bitmaps);
+    for (i = 0, bitmaps = arg->bitmaps; bitmaps;
+         i++, bitmaps = bitmaps->next) {
         const char *bitmap = bitmaps->value;
         BlockDriverState *bs = blk_bs(blk);
         BdrvDirtyBitmap *bm = NULL;
@@ -1580,11 +1610,15 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
             goto fail;
         }
 
-        bdrv_dirty_bitmap_set_busy(bm, true);
-        exp->export_bitmap = bm;
+        exp->export_bitmaps[i] = bm;
         assert(strlen(bitmap) <= BDRV_BITMAP_MAX_NAME_SIZE);
     }
 
+    /* Mark bitmaps busy in a separate loop, to simplify roll-back concerns. */
+    for (i = 0; i < exp->nr_export_bitmaps; i++) {
+        bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], true);
+    }
+
     blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
 
     QTAILQ_INSERT_TAIL(&exports, exp, next);
@@ -1592,6 +1626,7 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
     return 0;
 
 fail:
+    g_free(exp->export_bitmaps);
     g_free(exp->name);
     g_free(exp->description);
     return ret;
@@ -1641,6 +1676,7 @@ static void nbd_export_request_shutdown(BlockExport *blk_exp)
 
 static void nbd_export_delete(BlockExport *blk_exp)
 {
+    size_t i;
     NBDExport *exp = container_of(blk_exp, NBDExport, common);
 
     assert(exp->name == NULL);
@@ -1658,8 +1694,8 @@ static void nbd_export_delete(BlockExport *blk_exp)
                                         blk_aio_detach, exp);
     }
 
-    if (exp->export_bitmap) {
-        bdrv_dirty_bitmap_set_busy(exp->export_bitmap, false);
+    for (i = 0; i < exp->nr_export_bitmaps; i++) {
+        bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], false);
     }
 }
 
@@ -2268,6 +2304,7 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
     int flags;
     NBDExport *exp = client->exp;
     char *msg;
+    size_t i;
 
     switch (request->type) {
     case NBD_CMD_CACHE:
@@ -2358,12 +2395,15 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
                 }
             }
 
-            if (client->export_meta.bitmap) {
+            for (i = 0; i < client->exp->nr_export_bitmaps; i++) {
+                if (!client->export_meta.bitmaps[i]) {
+                    continue;
+                }
                 ret = nbd_co_send_bitmap(client, request->handle,
-                                         client->exp->export_bitmap,
+                                         client->exp->export_bitmaps[i],
                                          request->from, request->len,
                                          dont_fragment, !--contexts_remaining,
-                                         NBD_META_ID_DIRTY_BITMAP, errp);
+                                         NBD_META_ID_DIRTY_BITMAP + i, errp);
                 if (ret < 0) {
                     return ret;
                 }
diff --git a/tests/qemu-iotests/291 b/tests/qemu-iotests/291
index b7320bc7ad..ecef9eec62 100755
--- a/tests/qemu-iotests/291
+++ b/tests/qemu-iotests/291
@@ -115,16 +115,14 @@ echo
 # x-dirty-bitmap is a hack for reading bitmaps; it abuses block status to
 # report "data":false for portions of the bitmap which are set
 IMG="driver=nbd,server.type=unix,server.path=$nbd_unix_socket"
-nbd_server_start_unix_socket -r -f qcow2 -B b0 "$TEST_IMG"
+nbd_server_start_unix_socket -r -f qcow2 \
+    -B b0 -B b1 -B b2 -B b3 "$TEST_IMG"
 $QEMU_IMG map --output=json --image-opts \
     "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b0" | _filter_qemu_img_map
-nbd_server_start_unix_socket -r -f qcow2 -B b1 "$TEST_IMG"
 $QEMU_IMG map --output=json --image-opts \
     "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b1" | _filter_qemu_img_map
-nbd_server_start_unix_socket -r -f qcow2 -B b2 "$TEST_IMG"
 $QEMU_IMG map --output=json --image-opts \
     "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b2" | _filter_qemu_img_map
-nbd_server_start_unix_socket -r -f qcow2 -B b3 "$TEST_IMG"
 $QEMU_IMG map --output=json --image-opts \
     "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b3" | _filter_qemu_img_map
 

From a92b1b065eed385d9077be735eb8e92f5a6e150a Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Tue, 27 Oct 2020 00:05:53 -0500
Subject: [PATCH 10/12] block: Return depth level during
 bdrv_is_allocated_above

When checking for allocation across a chain, it's already easy to
count the depth within the chain at which the allocation is found.
Instead of throwing that information away, return it to the caller.
Existing callers only cared about allocated/non-allocated, but having
a depth available will be used by NBD in the next patch.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20201027050556.269064-9-eblake@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
[eblake: rebase to master]
Signed-off-by: Eric Blake <eblake@redhat.com>
---
 block/commit.c     |  2 +-
 block/coroutines.h |  6 ++++--
 block/io.c         | 31 +++++++++++++++++++++++--------
 block/mirror.c     |  2 +-
 block/stream.c     |  2 +-
 5 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/block/commit.c b/block/commit.c
index 1e85c306cc..71db7ba747 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -156,7 +156,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
         /* Copy if allocated above the base */
         ret = bdrv_is_allocated_above(blk_bs(s->top), s->base_overlay, true,
                                       offset, COMMIT_BUFFER_SIZE, &n);
-        copy = (ret == 1);
+        copy = (ret > 0);
         trace_commit_one_iteration(s, offset, n, ret);
         if (copy) {
             assert(n < SIZE_MAX);
diff --git a/block/coroutines.h b/block/coroutines.h
index 1cb3128b94..4cfb4946e6 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -47,7 +47,8 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
                                   int64_t bytes,
                                   int64_t *pnum,
                                   int64_t *map,
-                                  BlockDriverState **file);
+                                  BlockDriverState **file,
+                                  int *depth);
 int generated_co_wrapper
 bdrv_common_block_status_above(BlockDriverState *bs,
                                BlockDriverState *base,
@@ -57,7 +58,8 @@ bdrv_common_block_status_above(BlockDriverState *bs,
                                int64_t bytes,
                                int64_t *pnum,
                                int64_t *map,
-                               BlockDriverState **file);
+                               BlockDriverState **file,
+                               int *depth);
 
 int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs,
                                        QEMUIOVector *qiov, int64_t pos);
diff --git a/block/io.c b/block/io.c
index 9918f2499c..ec5e152bb7 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2362,20 +2362,28 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
                                   int64_t bytes,
                                   int64_t *pnum,
                                   int64_t *map,
-                                  BlockDriverState **file)
+                                  BlockDriverState **file,
+                                  int *depth)
 {
     int ret;
     BlockDriverState *p;
     int64_t eof = 0;
+    int dummy;
 
     assert(!include_base || base); /* Can't include NULL base */
 
+    if (!depth) {
+        depth = &dummy;
+    }
+    *depth = 0;
+
     if (!include_base && bs == base) {
         *pnum = bytes;
         return 0;
     }
 
     ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
+    ++*depth;
     if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) {
         return ret;
     }
@@ -2392,6 +2400,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
     {
         ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
                                    file);
+        ++*depth;
         if (ret < 0) {
             return ret;
         }
@@ -2450,7 +2459,7 @@ int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
                             int64_t *map, BlockDriverState **file)
 {
     return bdrv_common_block_status_above(bs, base, false, true, offset, bytes,
-                                          pnum, map, file);
+                                          pnum, map, file, NULL);
 }
 
 int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
@@ -2478,7 +2487,7 @@ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
     }
 
     ret = bdrv_common_block_status_above(bs, NULL, false, false, offset,
-                                         bytes, &pnum, NULL, NULL);
+                                         bytes, &pnum, NULL, NULL, NULL);
 
     if (ret < 0) {
         return ret;
@@ -2495,7 +2504,7 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
 
     ret = bdrv_common_block_status_above(bs, bs, true, false, offset,
                                          bytes, pnum ? pnum : &dummy, NULL,
-                                         NULL);
+                                         NULL, NULL);
     if (ret < 0) {
         return ret;
     }
@@ -2505,8 +2514,9 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
 /*
  * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
  *
- * Return 1 if (a prefix of) the given range is allocated in any image
- * between BASE and TOP (BASE is only included if include_base is set).
+ * Return a positive depth if (a prefix of) the given range is allocated
+ * in any image between BASE and TOP (BASE is only included if include_base
+ * is set).  Depth 1 is TOP, 2 is the first backing layer, and so forth.
  * BASE can be NULL to check if the given offset is allocated in any
  * image of the chain.  Return 0 otherwise, or negative errno on
  * failure.
@@ -2523,13 +2533,18 @@ int bdrv_is_allocated_above(BlockDriverState *top,
                             bool include_base, int64_t offset,
                             int64_t bytes, int64_t *pnum)
 {
+    int depth;
     int ret = bdrv_common_block_status_above(top, base, include_base, false,
-                                             offset, bytes, pnum, NULL, NULL);
+                                             offset, bytes, pnum, NULL, NULL,
+                                             &depth);
     if (ret < 0) {
         return ret;
     }
 
-    return !!(ret & BDRV_BLOCK_ALLOCATED);
+    if (ret & BDRV_BLOCK_ALLOCATED) {
+        return depth;
+    }
+    return 0;
 }
 
 int coroutine_fn
diff --git a/block/mirror.c b/block/mirror.c
index 26acf4af6f..8e1ad6eceb 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -846,7 +846,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
         }
 
         assert(count);
-        if (ret == 1) {
+        if (ret > 0) {
             bdrv_set_dirty_bitmap(s->dirty_bitmap, offset, count);
         }
         offset += count;
diff --git a/block/stream.c b/block/stream.c
index 8ce6729a33..236384f2f7 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -167,7 +167,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
                 n = len - offset;
             }
 
-            copy = (ret == 1);
+            copy = (ret > 0);
         }
         trace_stream_one_iteration(s, offset, n, ret);
         if (copy) {

From 71719cd57fc02ddfd91a4a3ca3f469bfb4d221bc Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Tue, 27 Oct 2020 00:05:54 -0500
Subject: [PATCH 11/12] nbd: Add new qemu:allocation-depth metadata context

'qemu-img map' provides a way to determine which extents of an image
come from the top layer vs. inherited from a backing chain.  This is
useful information worth exposing over NBD.  There is a proposal to
add a QMP command block-dirty-bitmap-populate which can create a dirty
bitmap that reflects allocation information, at which point the
qemu:dirty-bitmap:NAME metadata context can expose that information
via the creation of a temporary bitmap, but we can shorten the effort
by adding a new qemu:allocation-depth metadata context that does the
same thing without an intermediate bitmap (this patch does not
eliminate the need for that proposal, as it will have other uses as
well).

While documenting things, remember that although the NBD protocol has
NBD_OPT_SET_META_CONTEXT, the rest of its documentation refers to
'metadata context', which is a more apt description of what is
actually being used by NBD_CMD_BLOCK_STATUS: the user is requesting
metadata by passing one or more context names.  So I also touched up
some existing wording to prefer the term 'metadata context' where it
makes sense.

Note that this patch does not actually enable any way to request a
server to enable this context; that will come in the next patch.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20201027050556.269064-10-eblake@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 docs/interop/nbd.txt | 23 +++++++++++----
 include/block/nbd.h  |  8 +++--
 nbd/server.c         | 70 +++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 89 insertions(+), 12 deletions(-)

diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt
index f3b3cacc96..10ce098a29 100644
--- a/docs/interop/nbd.txt
+++ b/docs/interop/nbd.txt
@@ -17,19 +17,31 @@ namespace "qemu".
 
 == "qemu" namespace ==
 
-The "qemu" namespace currently contains only one type of context,
-related to exposing the contents of a dirty bitmap alongside the
-associated disk contents.  That context has the following form:
+The "qemu" namespace currently contains two available metadata context
+types.  The first is related to exposing the contents of a dirty
+bitmap alongside the associated disk contents.  That metadata context
+is named with the following form:
 
     qemu:dirty-bitmap:<dirty-bitmap-export-name>
 
 Each dirty-bitmap metadata context defines only one flag for extents
 in reply for NBD_CMD_BLOCK_STATUS:
 
-    bit 0: NBD_STATE_DIRTY, means that the extent is "dirty"
+    bit 0: NBD_STATE_DIRTY, set when the extent is "dirty"
+
+The second is related to exposing the source of various extents within
+the image, with a single metadata context named:
+
+    qemu:allocation-depth
+
+In the allocation depth context, the entire 32-bit value represents a
+depth of which layer in a thin-provisioned backing chain provided the
+data (0 for unallocated, 1 for the active layer, 2 for the first
+backing layer, and so forth).
 
 For NBD_OPT_LIST_META_CONTEXT the following queries are supported
-in addition to "qemu:dirty-bitmap:<dirty-bitmap-export-name>":
+in addition to the specific "qemu:allocation-depth" and
+"qemu:dirty-bitmap:<dirty-bitmap-export-name>":
 
 * "qemu:" - returns list of all available metadata contexts in the
             namespace.
@@ -55,3 +67,4 @@ the operation of that feature.
 NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE
 * 4.2: NBD_FLAG_CAN_MULTI_CONN for shareable read-only exports,
 NBD_CMD_FLAG_FAST_ZERO
+* 5.2: NBD_CMD_BLOCK_STATUS for "qemu:allocation-depth"
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 3dd9a04546..4a52a43ef5 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (C) 2016-2019 Red Hat, Inc.
+ *  Copyright (C) 2016-2020 Red Hat, Inc.
  *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
  *
  *  Network Block Device
@@ -47,7 +47,7 @@ typedef struct NBDOptionReply NBDOptionReply;
 typedef struct NBDOptionReplyMetaContext {
     NBDOptionReply h; /* h.type = NBD_REP_META_CONTEXT, h.length > 4 */
     uint32_t context_id;
-    /* meta context name follows */
+    /* metadata context name follows */
 } QEMU_PACKED NBDOptionReplyMetaContext;
 
 /* Transmission phase structs
@@ -229,7 +229,7 @@ enum {
 #define NBD_MAX_BUFFER_SIZE (32 * 1024 * 1024)
 
 /*
- * Maximum size of a protocol string (export name, meta context name,
+ * Maximum size of a protocol string (export name, metadata context name,
  * etc.).  Use malloc rather than stack allocation for storage of a
  * string.
  */
@@ -259,6 +259,8 @@ enum {
 /* Extent flags for qemu:dirty-bitmap in NBD_REPLY_TYPE_BLOCK_STATUS */
 #define NBD_STATE_DIRTY (1 << 0)
 
+/* No flags needed for qemu:allocation-depth in NBD_REPLY_TYPE_BLOCK_STATUS */
+
 static inline bool nbd_reply_type_is_error(int type)
 {
     return type & (1 << 15);
diff --git a/nbd/server.c b/nbd/server.c
index b6841e4554..ebbefcb6d3 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -27,8 +27,9 @@
 #include "qemu/units.h"
 
 #define NBD_META_ID_BASE_ALLOCATION 0
+#define NBD_META_ID_ALLOCATION_DEPTH 1
 /* Dirty bitmaps use 'NBD_META_ID_DIRTY_BITMAP + i', so keep this id last. */
-#define NBD_META_ID_DIRTY_BITMAP 1
+#define NBD_META_ID_DIRTY_BITMAP 2
 
 /*
  * NBD_MAX_BLOCK_STATUS_EXTENTS: 1 MiB of extents data. An empirical
@@ -95,6 +96,7 @@ struct NBDExport {
     BlockBackend *eject_notifier_blk;
     Notifier eject_notifier;
 
+    bool allocation_depth;
     BdrvDirtyBitmap **export_bitmaps;
     size_t nr_export_bitmaps;
 };
@@ -108,6 +110,7 @@ typedef struct NBDExportMetaContexts {
     NBDExport *exp;
     size_t count; /* number of negotiated contexts */
     bool base_allocation; /* export base:allocation context (block status) */
+    bool allocation_depth; /* export qemu:allocation-depth */
     bool *bitmaps; /*
                     * export qemu:dirty-bitmap:<export bitmap name>,
                     * sized by exp->nr_export_bitmaps
@@ -857,7 +860,8 @@ static bool nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
 /* nbd_meta_qemu_query
  *
  * Handle queries to 'qemu' namespace. For now, only the qemu:dirty-bitmap:
- * context is available.  Return true if @query has been handled.
+ * and qemu:allocation-depth contexts are available.  Return true if @query
+ * has been handled.
  */
 static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
                                 const char *query)
@@ -871,12 +875,19 @@ static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
 
     if (!*query) {
         if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
+            meta->allocation_depth = meta->exp->allocation_depth;
             memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
         }
         trace_nbd_negotiate_meta_query_parse("empty");
         return true;
     }
 
+    if (strcmp(query, "allocation-depth") == 0) {
+        trace_nbd_negotiate_meta_query_parse("allocation-depth");
+        meta->allocation_depth = meta->exp->allocation_depth;
+        return true;
+    }
+
     if (nbd_strshift(&query, "dirty-bitmap:")) {
         trace_nbd_negotiate_meta_query_parse("dirty-bitmap:");
         if (!*query) {
@@ -901,7 +912,7 @@ static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
         return true;
     }
 
-    trace_nbd_negotiate_meta_query_skip("not dirty-bitmap");
+    trace_nbd_negotiate_meta_query_skip("unknown qemu context");
     return true;
 }
 
@@ -1008,6 +1019,7 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
     if (client->opt == NBD_OPT_LIST_META_CONTEXT && !nb_queries) {
         /* enable all known contexts */
         meta->base_allocation = true;
+        meta->allocation_depth = meta->exp->allocation_depth;
         memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
     } else {
         for (i = 0; i < nb_queries; ++i) {
@@ -1028,6 +1040,16 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
         count++;
     }
 
+    if (meta->allocation_depth) {
+        ret = nbd_negotiate_send_meta_context(client, "qemu:allocation-depth",
+                                              NBD_META_ID_ALLOCATION_DEPTH,
+                                              errp);
+        if (ret < 0) {
+            return ret;
+        }
+        count++;
+    }
+
     for (i = 0; i < meta->exp->nr_export_bitmaps; i++) {
         const char *bm_name;
         g_autofree char *context = NULL;
@@ -2005,6 +2027,29 @@ static int blockstatus_to_extents(BlockDriverState *bs, uint64_t offset,
     return 0;
 }
 
+static int blockalloc_to_extents(BlockDriverState *bs, uint64_t offset,
+                                 uint64_t bytes, NBDExtentArray *ea)
+{
+    while (bytes) {
+        int64_t num;
+        int ret = bdrv_is_allocated_above(bs, NULL, false, offset, bytes,
+                                          &num);
+
+        if (ret < 0) {
+            return ret;
+        }
+
+        if (nbd_extent_array_add(ea, num, ret) < 0) {
+            return 0;
+        }
+
+        offset += num;
+        bytes -= num;
+    }
+
+    return 0;
+}
+
 /*
  * nbd_co_send_extents
  *
@@ -2044,7 +2089,11 @@ static int nbd_co_send_block_status(NBDClient *client, uint64_t handle,
     unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
     g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
 
-    ret = blockstatus_to_extents(bs, offset, length, ea);
+    if (context_id == NBD_META_ID_BASE_ALLOCATION) {
+        ret = blockstatus_to_extents(bs, offset, length, ea);
+    } else {
+        ret = blockalloc_to_extents(bs, offset, length, ea);
+    }
     if (ret < 0) {
         return nbd_co_send_structured_error(
                 client, handle, -ret, "can't get block status", errp);
@@ -2395,6 +2444,19 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
                 }
             }
 
+            if (client->export_meta.allocation_depth) {
+                ret = nbd_co_send_block_status(client, request->handle,
+                                               blk_bs(exp->common.blk),
+                                               request->from, request->len,
+                                               dont_fragment,
+                                               !--contexts_remaining,
+                                               NBD_META_ID_ALLOCATION_DEPTH,
+                                               errp);
+                if (ret < 0) {
+                    return ret;
+                }
+            }
+
             for (i = 0; i < client->exp->nr_export_bitmaps; i++) {
                 if (!client->export_meta.bitmaps[i]) {
                     continue;

From dbc7b01492371e4a54b92d2b6d968f9b863cc794 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Tue, 27 Oct 2020 00:05:55 -0500
Subject: [PATCH 12/12] nbd: Add 'qemu-nbd -A' to expose allocation depth

Allow the server to expose an additional metacontext to be requested
by savvy clients.  qemu-nbd adds a new option -A to expose the
qemu:allocation-depth metacontext through NBD_CMD_BLOCK_STATUS; this
can also be set via QMP when using block-export-add.

qemu as client is hacked into viewing the key aspects of this new
context by abusing the already-experimental x-dirty-bitmap option to
collapse all depths greater than 2, which results in a tri-state value
visible in the output of 'qemu-img map --output=json' (yes, that means
x-dirty-bitmap is now a bit of a misnomer, but I didn't feel like
renaming it as it would introduce a needless break of back-compat,
even though we make no compat guarantees with x- members):

unallocated (depth 0) => "zero":false, "data":true
local (depth 1)       => "zero":false, "data":false
backing (depth 2+)    => "zero":true,  "data":true

libnbd as client is probably a nicer way to get at the information
without having to decipher such hacks in qemu as client. ;)

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20201027050556.269064-11-eblake@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 block/nbd.c                | 26 ++++++++++---
 docs/tools/qemu-nbd.rst    |  8 +++-
 nbd/server.c               |  2 +
 qapi/block-core.json       |  7 +++-
 qapi/block-export.json     |  7 +++-
 qemu-nbd.c                 | 12 +++++-
 tests/qemu-iotests/309     | 77 ++++++++++++++++++++++++++++++++++++++
 tests/qemu-iotests/309.out | 22 +++++++++++
 tests/qemu-iotests/group   |  1 +
 9 files changed, 151 insertions(+), 11 deletions(-)
 create mode 100755 tests/qemu-iotests/309
 create mode 100644 tests/qemu-iotests/309.out

diff --git a/block/nbd.c b/block/nbd.c
index 4548046cd7..42536702b6 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -135,6 +135,7 @@ typedef struct BDRVNBDState {
     QCryptoTLSCreds *tlscreds;
     const char *hostname;
     char *x_dirty_bitmap;
+    bool alloc_depth;
 
     bool wait_connect;
     NBDConnectThread *connect_thread;
@@ -961,6 +962,16 @@ static int nbd_parse_blockstatus_payload(BDRVNBDState *s,
         trace_nbd_parse_blockstatus_compliance("extent length too large");
     }
 
+    /*
+     * HACK: if we are using x-dirty-bitmaps to access
+     * qemu:allocation-depth, treat all depths > 2 the same as 2,
+     * since nbd_client_co_block_status is only expecting the low two
+     * bits to be set.
+     */
+    if (s->alloc_depth && extent->flags > 2) {
+        extent->flags = 2;
+    }
+
     return 0;
 }
 
@@ -1795,11 +1806,16 @@ static int nbd_client_handshake(BlockDriverState *bs, QIOChannelSocket *sioc,
         s->sioc = NULL;
         return ret;
     }
-    if (s->x_dirty_bitmap && !s->info.base_allocation) {
-        error_setg(errp, "requested x-dirty-bitmap %s not found",
-                   s->x_dirty_bitmap);
-        ret = -EINVAL;
-        goto fail;
+    if (s->x_dirty_bitmap) {
+        if (!s->info.base_allocation) {
+            error_setg(errp, "requested x-dirty-bitmap %s not found",
+                       s->x_dirty_bitmap);
+            ret = -EINVAL;
+            goto fail;
+        }
+        if (strcmp(s->x_dirty_bitmap, "qemu:allocation-depth") == 0) {
+            s->alloc_depth = true;
+        }
     }
     if (s->info.flags & NBD_FLAG_READ_ONLY) {
         ret = bdrv_apply_auto_read_only(bs, "NBD export is read-only", errp);
diff --git a/docs/tools/qemu-nbd.rst b/docs/tools/qemu-nbd.rst
index 667861cb22..fe41336dc5 100644
--- a/docs/tools/qemu-nbd.rst
+++ b/docs/tools/qemu-nbd.rst
@@ -72,10 +72,16 @@ driver options if ``--image-opts`` is specified.
 
   Export the disk as read-only.
 
+.. option:: -A, --allocation-depth
+
+  Expose allocation depth information via the
+  ``qemu:allocation-depth`` metadata context accessible through
+  NBD_OPT_SET_META_CONTEXT.
+
 .. option:: -B, --bitmap=NAME
 
   If *filename* has a qcow2 persistent bitmap *NAME*, expose
-  that bitmap via the ``qemu:dirty-bitmap:NAME`` context
+  that bitmap via the ``qemu:dirty-bitmap:NAME`` metadata context
   accessible through NBD_OPT_SET_META_CONTEXT.
 
 .. option:: -s, --snapshot
diff --git a/nbd/server.c b/nbd/server.c
index ebbefcb6d3..d145e1a690 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1641,6 +1641,8 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
         bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], true);
     }
 
+    exp->allocation_depth = arg->allocation_depth;
+
     blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
 
     QTAILQ_INSERT_TAIL(&exports, exp, next);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index e00fc27b5e..1b8b4156b4 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3905,9 +3905,12 @@
 #
 # @tls-creds: TLS credentials ID
 #
-# @x-dirty-bitmap: A "qemu:dirty-bitmap:NAME" string to query in place of
+# @x-dirty-bitmap: A metadata context name such as "qemu:dirty-bitmap:NAME"
+#                  or "qemu:allocation-depth" to query in place of the
 #                  traditional "base:allocation" block status (see
-#                  NBD_OPT_LIST_META_CONTEXT in the NBD protocol) (since 3.0)
+#                  NBD_OPT_LIST_META_CONTEXT in the NBD protocol; and
+#                  yes, naming this option x-context would have made
+#                  more sense) (since 3.0)
 #
 # @reconnect-delay: On an unexpected disconnect, the nbd client tries to
 #                   connect again until succeeding or encountering a serious
diff --git a/qapi/block-export.json b/qapi/block-export.json
index c4125f4d21..a9f488f99c 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -90,11 +90,16 @@
 #           the metadata context name "qemu:dirty-bitmap:BITMAP" to inspect
 #           each bitmap.
 #
+# @allocation-depth: Also export the allocation depth map for @device, so
+#                    the NBD client can use NBD_OPT_SET_META_CONTEXT with
+#                    the metadata context name "qemu:allocation-depth" to
+#                    inspect allocation details. (since 5.2)
+#
 # Since: 5.2
 ##
 { 'struct': 'BlockExportOptionsNbd',
   'base': 'BlockExportOptionsNbdBase',
-  'data': { '*bitmaps': ['str'] } }
+  'data': { '*bitmaps': ['str'], '*allocation-depth': 'bool' } }
 
 ##
 # @BlockExportOptionsVhostUserBlk:
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 74e73a7465..75ced65030 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -100,6 +100,7 @@ static void usage(const char *name)
 "\n"
 "Exposing part of the image:\n"
 "  -o, --offset=OFFSET       offset into the image\n"
+"  -A, --allocation-depth    expose the allocation depth\n"
 "  -B, --bitmap=NAME         expose a persistent dirty bitmap\n"
 "\n"
 "General purpose options:\n"
@@ -524,7 +525,7 @@ int main(int argc, char **argv)
     char *device = NULL;
     QemuOpts *sn_opts = NULL;
     const char *sn_id_or_name = NULL;
-    const char *sopt = "hVb:o:p:rsnc:dvk:e:f:tl:x:T:D:B:L";
+    const char *sopt = "hVb:o:p:rsnc:dvk:e:f:tl:x:T:D:AB:L";
     struct option lopt[] = {
         { "help", no_argument, NULL, 'h' },
         { "version", no_argument, NULL, 'V' },
@@ -533,6 +534,7 @@ int main(int argc, char **argv)
         { "socket", required_argument, NULL, 'k' },
         { "offset", required_argument, NULL, 'o' },
         { "read-only", no_argument, NULL, 'r' },
+        { "allocation-depth", no_argument, NULL, 'A' },
         { "bitmap", required_argument, NULL, 'B' },
         { "connect", required_argument, NULL, 'c' },
         { "disconnect", no_argument, NULL, 'd' },
@@ -575,6 +577,7 @@ int main(int argc, char **argv)
     const char *export_name = NULL; /* defaults to "" later for server mode */
     const char *export_description = NULL;
     strList *bitmaps = NULL;
+    bool alloc_depth = false;
     const char *tlscredsid = NULL;
     bool imageOpts = false;
     bool writethrough = true;
@@ -689,6 +692,9 @@ int main(int argc, char **argv)
             readonly = true;
             flags &= ~BDRV_O_RDWR;
             break;
+        case 'A':
+            alloc_depth = true;
+            break;
         case 'B':
             QAPI_LIST_PREPEND(bitmaps, g_strdup(optarg));
             break;
@@ -787,7 +793,7 @@ int main(int argc, char **argv)
         }
         if (export_name || export_description || dev_offset ||
             device || disconnect || fmt || sn_id_or_name || bitmaps ||
-            seen_aio || seen_discard || seen_cache) {
+            alloc_depth || seen_aio || seen_discard || seen_cache) {
             error_report("List mode is incompatible with per-device settings");
             exit(EXIT_FAILURE);
         }
@@ -1073,6 +1079,8 @@ int main(int argc, char **argv)
             .description          = g_strdup(export_description),
             .has_bitmaps          = !!bitmaps,
             .bitmaps              = bitmaps,
+            .has_allocation_depth = alloc_depth,
+            .allocation_depth     = alloc_depth,
         },
     };
     blk_exp_add(export_opts, &error_fatal);
diff --git a/tests/qemu-iotests/309 b/tests/qemu-iotests/309
new file mode 100755
index 0000000000..fb61157c2e
--- /dev/null
+++ b/tests/qemu-iotests/309
@@ -0,0 +1,77 @@
+#!/usr/bin/env bash
+#
+# Test qemu-nbd -A
+#
+# Copyright (C) 2018-2020 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+status=1 # failure is the default!
+
+_cleanup()
+{
+    _cleanup_test_img
+    nbd_server_stop
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.nbd
+
+_supported_fmt qcow2
+_supported_proto file
+_supported_os Linux
+_require_command QEMU_NBD
+
+echo
+echo "=== Initial image setup ==="
+echo
+
+TEST_IMG="$TEST_IMG.base" _make_test_img 4M
+$QEMU_IO -c 'w 0 2M' -f $IMGFMT "$TEST_IMG.base" | _filter_qemu_io
+_make_test_img -b "$TEST_IMG.base" -F $IMGFMT 4M
+$QEMU_IO -c 'w 1M 2M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "=== Check allocation over NBD ==="
+echo
+
+$QEMU_IMG map --output=json -f qcow2 "$TEST_IMG"
+IMG="driver=nbd,server.type=unix,server.path=$nbd_unix_socket"
+nbd_server_start_unix_socket -r -f qcow2 -A "$TEST_IMG"
+# Normal -f raw NBD block status loses access to allocation information
+$QEMU_IMG map --output=json --image-opts \
+    "$IMG" | _filter_qemu_img_map
+# But when we use -A, coupled with x-dirty-bitmap in the client for feeding
+# 2-bit block status from an alternative NBD metadata context (note that
+# the client code for x-dirty-bitmap intentionally collapses all depths
+# beyond 2 into a single value), we can determine:
+#    unallocated (depth 0) => "zero":false, "data":true
+#    local (depth 1)       => "zero":false, "data":false
+#    backing (depth 2+)    => "zero":true,  "data":true
+$QEMU_IMG map --output=json --image-opts \
+    "$IMG,x-dirty-bitmap=qemu:allocation-depth" | _filter_qemu_img_map
+# More accurate results can be obtained by other NBD clients such as
+# libnbd, but this test works without such external dependencies.
+
+# success, all done
+echo '*** done'
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/309.out b/tests/qemu-iotests/309.out
new file mode 100644
index 0000000000..db75bb6b0d
--- /dev/null
+++ b/tests/qemu-iotests/309.out
@@ -0,0 +1,22 @@
+QA output created by 309
+
+=== Initial image setup ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=4194304
+wrote 2097152/2097152 bytes at offset 0
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
+wrote 2097152/2097152 bytes at offset 1048576
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+=== Check allocation over NBD ===
+
+[{ "start": 0, "length": 1048576, "depth": 1, "zero": false, "data": true, "offset": 327680},
+{ "start": 1048576, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": 327680},
+{ "start": 3145728, "length": 1048576, "depth": 1, "zero": true, "data": false}]
+[{ "start": 0, "length": 3145728, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 3145728, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
+[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": true, "offset": OFFSET},
+{ "start": 1048576, "length": 2097152, "depth": 0, "zero": false, "data": false},
+{ "start": 3145728, "length": 1048576, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 3432989283..2960dff728 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -315,3 +315,4 @@
 304 rw quick
 305 rw quick
 307 rw quick export
+309 rw auto quick