Block layer patches

- rbd: fix handling of holes in .bdrv_co_block_status
 - Fix potential crash in bdrv_set_backing_hd()
 - vhost-user-blk export: Fix shutdown with requests in flight
 - FUSE export: Fix build failure on FreeBSD
 - Documentation improvements
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmH5TlARHGt3b2xmQHJl
 ZGhhdC5jb20ACgkQfwmycsiPL9biGQ/9GLOXFaFVDdrAOSievKc1xGy3tirX21Wn
 xSQgRUFHcjbMu/r/I5hA5imCNWq8KmT5S+aMUO76RAsRDH94QZdTMlq/1bmPBgkY
 Pu4aKhmP0WzPOmqnjhq19rpk44J75lCtAwc+r+VLzGZUali/wOcIkEQPID3RgSlQ
 628dylVwFF57cQzdvUPph7+iaewJ3OUlk3plYUkyLB/1lRuBTZD6E0bcUeN4eo/K
 YvKMpiRMLyFJwX9d50YRhFw8zwM4cXLUynRzdDSZuUoGeaih59p2GJzkbvrXbBer
 edtEjwvf5PAVLXmHwWI+zz/aC4KYIE+sppB2YCOHhcORcAmKbCpP5Ky7W2jJQ6rJ
 UvbVwjHxVUB3JN59MYsVbhH5l7i/HrT13TZ2VR2HAn4kswk8s3DNGVF0I+DnGD1g
 gHBlxtAeORvM/+7E6hxX4cFY8ZNsji5DGBpbEtfXtGizP0LkF1YJhH7lB2ZSml50
 PJqqxTCTS8MevxWHuSdp+gV7stQoQHIuaNu9jKXrzqQWh+ezuJp1AhcRRWguxoOp
 n+SZpDybQBCXN0EfWlVECmdri8WJsmdBSD/K5qJ0ehN2bF4d6No0c5aCKJAKzfgp
 ygQ+rKPzGplp6cP16Pluu/tCiu1HDar8NajxErX8qqopBVnZmMZNtqi0GjktmzdB
 OhYOyI3m0G0=
 =Eyza
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/kwolf-gitlab/tags/for-upstream' into staging

Block layer patches

- rbd: fix handling of holes in .bdrv_co_block_status
- Fix potential crash in bdrv_set_backing_hd()
- vhost-user-blk export: Fix shutdown with requests in flight
- FUSE export: Fix build failure on FreeBSD
- Documentation improvements

# gpg: Signature made Tue 01 Feb 2022 15:14:24 GMT
# gpg:                using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6
# gpg:                issuer "kwolf@redhat.com"
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kwolf-gitlab/tags/for-upstream:
  block/rbd: workaround for ceph issue #53784
  block/rbd: fix handling of holes in .bdrv_co_block_status
  qemu-img: Unify [-b [-F]] documentation
  qsd: Document fuse's allow-other option
  block.h: remove outdated comment
  block/export/fuse: Fix build failure on FreeBSD
  block/export/fuse: Rearrange if-else-if ladder in fuse_fallocate()
  block/export: Fix vhost-user-blk shutdown with requests in flight
  block: bdrv_set_backing_hd(): use drained section
  qemu-storage-daemon: Fix typo in vhost-user-blk help

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2022-02-01 19:48:15 +00:00
commit 47cc1a3655
10 changed files with 118 additions and 34 deletions

View File

@ -3341,6 +3341,8 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
int ret; int ret;
Transaction *tran = tran_new(); Transaction *tran = tran_new();
bdrv_drained_begin(bs);
ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp); ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp);
if (ret < 0) { if (ret < 0) {
goto out; goto out;
@ -3350,6 +3352,8 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
out: out:
tran_finalize(tran, ret); tran_finalize(tran, ret);
bdrv_drained_end(bs);
return ret; return ret;
} }

View File

@ -625,11 +625,33 @@ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
return; return;
} }
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
if (mode & FALLOC_FL_KEEP_SIZE) { if (mode & FALLOC_FL_KEEP_SIZE) {
length = MIN(length, blk_len - offset); length = MIN(length, blk_len - offset);
} }
#endif /* CONFIG_FALLOCATE_PUNCH_HOLE */
if (mode & FALLOC_FL_PUNCH_HOLE) { if (!mode) {
/* We can only fallocate at the EOF with a truncate */
if (offset < blk_len) {
fuse_reply_err(req, EOPNOTSUPP);
return;
}
if (offset > blk_len) {
/* No preallocation needed here */
ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
if (ret < 0) {
fuse_reply_err(req, -ret);
return;
}
}
ret = fuse_do_truncate(exp, offset + length, true,
PREALLOC_MODE_FALLOC);
}
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
else if (mode & FALLOC_FL_PUNCH_HOLE) {
if (!(mode & FALLOC_FL_KEEP_SIZE)) { if (!(mode & FALLOC_FL_KEEP_SIZE)) {
fuse_reply_err(req, EINVAL); fuse_reply_err(req, EINVAL);
return; return;
@ -643,6 +665,7 @@ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
length -= size; length -= size;
} while (ret == 0 && length > 0); } while (ret == 0 && length > 0);
} }
#endif /* CONFIG_FALLOCATE_PUNCH_HOLE */
#ifdef CONFIG_FALLOCATE_ZERO_RANGE #ifdef CONFIG_FALLOCATE_ZERO_RANGE
else if (mode & FALLOC_FL_ZERO_RANGE) { else if (mode & FALLOC_FL_ZERO_RANGE) {
if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) { if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) {
@ -665,25 +688,7 @@ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
} while (ret == 0 && length > 0); } while (ret == 0 && length > 0);
} }
#endif /* CONFIG_FALLOCATE_ZERO_RANGE */ #endif /* CONFIG_FALLOCATE_ZERO_RANGE */
else if (!mode) { else {
/* We can only fallocate at the EOF with a truncate */
if (offset < blk_len) {
fuse_reply_err(req, EOPNOTSUPP);
return;
}
if (offset > blk_len) {
/* No preallocation needed here */
ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
if (ret < 0) {
fuse_reply_err(req, -ret);
return;
}
}
ret = fuse_do_truncate(exp, offset + length, true,
PREALLOC_MODE_FALLOC);
} else {
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
} }

View File

@ -172,6 +172,7 @@ vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov,
return VIRTIO_BLK_S_IOERR; return VIRTIO_BLK_S_IOERR;
} }
/* Called with server refcount increased, must decrease before returning */
static void coroutine_fn vu_blk_virtio_process_req(void *opaque) static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
{ {
VuBlkReq *req = opaque; VuBlkReq *req = opaque;
@ -286,10 +287,12 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
} }
vu_blk_req_complete(req); vu_blk_req_complete(req);
vhost_user_server_unref(server);
return; return;
err: err:
free(req); free(req);
vhost_user_server_unref(server);
} }
static void vu_blk_process_vq(VuDev *vu_dev, int idx) static void vu_blk_process_vq(VuDev *vu_dev, int idx)
@ -310,6 +313,8 @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx)
Coroutine *co = Coroutine *co =
qemu_coroutine_create(vu_blk_virtio_process_req, req); qemu_coroutine_create(vu_blk_virtio_process_req, req);
vhost_user_server_ref(server);
qemu_coroutine_enter(co); qemu_coroutine_enter(co);
} }
} }

View File

@ -1279,11 +1279,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
RBDDiffIterateReq *req = opaque; RBDDiffIterateReq *req = opaque;
assert(req->offs + req->bytes <= offs); assert(req->offs + req->bytes <= offs);
/*
* we do not diff against a snapshot so we should never receive a callback /* treat a hole like an unallocated area and bail out */
* for a hole. if (!exists) {
*/ return 0;
assert(exists); }
if (!req->exists && offs > req->offs) { if (!req->exists && offs > req->offs) {
/* /*
@ -1320,6 +1320,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
int status, r; int status, r;
RBDDiffIterateReq req = { .offs = offset }; RBDDiffIterateReq req = { .offs = offset };
uint64_t features, flags; uint64_t features, flags;
uint64_t head = 0;
assert(offset + bytes <= s->image_size); assert(offset + bytes <= s->image_size);
@ -1347,7 +1348,43 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
return status; return status;
} }
r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true, #if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 17, 0)
/*
* librbd had a bug until early 2022 that affected all versions of ceph that
* supported fast-diff. This bug results in reporting of incorrect offsets
* if the offset parameter to rbd_diff_iterate2 is not object aligned.
* Work around this bug by rounding down the offset to object boundaries.
* This is OK because we call rbd_diff_iterate2 with whole_object = true.
* However, this workaround only works for non cloned images with default
* striping.
*
* See: https://tracker.ceph.com/issues/53784
*/
/* check if RBD image has non-default striping enabled */
if (features & RBD_FEATURE_STRIPINGV2) {
return status;
}
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
/*
* check if RBD image is a clone (= has a parent).
*
* rbd_get_parent_info is deprecated from Nautilus onwards, but the
* replacement rbd_get_parent is not present in Luminous and Mimic.
*/
if (rbd_get_parent_info(s->image, NULL, 0, NULL, 0, NULL, 0) != -ENOENT) {
return status;
}
#pragma GCC diagnostic pop
head = req.offs & (s->object_size - 1);
req.offs -= head;
bytes += head;
#endif
r = rbd_diff_iterate2(s->image, NULL, req.offs, bytes, true, true,
qemu_rbd_diff_iterate_cb, &req); qemu_rbd_diff_iterate_cb, &req);
if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) { if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
return status; return status;
@ -1366,7 +1403,8 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID; status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
} }
*pnum = req.bytes; assert(req.bytes > head);
*pnum = req.bytes - head;
return status; return status;
} }

View File

@ -463,7 +463,7 @@ Command description:
``--skip-broken-bitmaps`` is also specified to copy only the ``--skip-broken-bitmaps`` is also specified to copy only the
consistent bitmaps. consistent bitmaps.
.. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE] [-F BACKING_FMT] [-u] [-o OPTIONS] FILENAME [SIZE] .. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE [-F BACKING_FMT]] [-u] [-o OPTIONS] FILENAME [SIZE]
Create the new disk image *FILENAME* of size *SIZE* and format Create the new disk image *FILENAME* of size *SIZE* and format
*FMT*. Depending on the file format, you can add one or more *OPTIONS* *FMT*. Depending on the file format, you can add one or more *OPTIONS*

View File

@ -76,7 +76,7 @@ Standard options:
.. option:: --export [type=]nbd,id=<id>,node-name=<node-name>[,name=<export-name>][,writable=on|off][,bitmap=<name>] .. option:: --export [type=]nbd,id=<id>,node-name=<node-name>[,name=<export-name>][,writable=on|off][,bitmap=<name>]
--export [type=]vhost-user-blk,id=<id>,node-name=<node-name>,addr.type=unix,addr.path=<socket-path>[,writable=on|off][,logical-block-size=<block-size>][,num-queues=<num-queues>] --export [type=]vhost-user-blk,id=<id>,node-name=<node-name>,addr.type=unix,addr.path=<socket-path>[,writable=on|off][,logical-block-size=<block-size>][,num-queues=<num-queues>]
--export [type=]vhost-user-blk,id=<id>,node-name=<node-name>,addr.type=fd,addr.str=<fd>[,writable=on|off][,logical-block-size=<block-size>][,num-queues=<num-queues>] --export [type=]vhost-user-blk,id=<id>,node-name=<node-name>,addr.type=fd,addr.str=<fd>[,writable=on|off][,logical-block-size=<block-size>][,num-queues=<num-queues>]
--export [type=]fuse,id=<id>,node-name=<node-name>,mountpoint=<file>[,growable=on|off][,writable=on|off] --export [type=]fuse,id=<id>,node-name=<node-name>,mountpoint=<file>[,growable=on|off][,writable=on|off][,allow-other=on|off|auto]
is a block export definition. ``node-name`` is the block node that should be is a block export definition. ``node-name`` is the block node that should be
exported. ``writable`` determines whether or not the export allows write exported. ``writable`` determines whether or not the export allows write
@ -103,7 +103,12 @@ Standard options:
mounted). Consequently, applications that have opened the given file before mounted). Consequently, applications that have opened the given file before
the export became active will continue to see its original content. If the export became active will continue to see its original content. If
``growable`` is set, writes after the end of the exported file will grow the ``growable`` is set, writes after the end of the exported file will grow the
block node to fit. block node to fit. The ``allow-other`` option controls whether users other
than the user running the process will be allowed to access the export. Note
that enabling this option as a non-root user requires enabling the
user_allow_other option in the global fuse.conf configuration file. Setting
``allow-other`` to auto (the default) will try enabling this option, and on
error fall back to disabling it.
.. option:: --monitor MONITORDEF .. option:: --monitor MONITORDEF

View File

@ -42,6 +42,8 @@ typedef struct {
const VuDevIface *vu_iface; const VuDevIface *vu_iface;
/* Protected by ctx lock */ /* Protected by ctx lock */
unsigned int refcount;
bool wait_idle;
VuDev vu_dev; VuDev vu_dev;
QIOChannel *ioc; /* The I/O channel with the client */ QIOChannel *ioc; /* The I/O channel with the client */
QIOChannelSocket *sioc; /* The underlying data channel with the client */ QIOChannelSocket *sioc; /* The underlying data channel with the client */
@ -59,6 +61,9 @@ bool vhost_user_server_start(VuServer *server,
void vhost_user_server_stop(VuServer *server); void vhost_user_server_stop(VuServer *server);
void vhost_user_server_ref(VuServer *server);
void vhost_user_server_unref(VuServer *server);
void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx); void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx);
void vhost_user_server_detach_aio_context(VuServer *server); void vhost_user_server_detach_aio_context(VuServer *server);

View File

@ -52,9 +52,9 @@ SRST
ERST ERST
DEF("create", img_create, DEF("create", img_create,
"create [--object objectdef] [-q] [-f fmt] [-b backing_file] [-F backing_fmt] [-u] [-o options] filename [size]") "create [--object objectdef] [-q] [-f fmt] [-b backing_file [-F backing_fmt]] [-u] [-o options] filename [size]")
SRST SRST
.. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE] [-F BACKING_FMT] [-u] [-o OPTIONS] FILENAME [SIZE] .. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE [-F BACKING_FMT]] [-u] [-o OPTIONS] FILENAME [SIZE]
ERST ERST
DEF("dd", img_dd, DEF("dd", img_dd,

View File

@ -100,7 +100,7 @@ static void help(void)
"\n" "\n"
#ifdef CONFIG_FUSE #ifdef CONFIG_FUSE
" --export [type=]fuse,id=<id>,node-name=<node-name>,mountpoint=<file>\n" " --export [type=]fuse,id=<id>,node-name=<node-name>,mountpoint=<file>\n"
" [,growable=on|off][,writable=on|off]\n" " [,growable=on|off][,writable=on|off][,allow-other=on|off|auto]\n"
" export the specified block node over FUSE\n" " export the specified block node over FUSE\n"
"\n" "\n"
#endif /* CONFIG_FUSE */ #endif /* CONFIG_FUSE */
@ -111,7 +111,7 @@ static void help(void)
" export the specified block node as a\n" " export the specified block node as a\n"
" vhost-user-blk device over UNIX domain socket\n" " vhost-user-blk device over UNIX domain socket\n"
" --export [type=]vhost-user-blk,id=<id>,node-name=<node-name>,\n" " --export [type=]vhost-user-blk,id=<id>,node-name=<node-name>,\n"
" fd,addr.str=<fd>[,writable=on|off]\n" " addr.type=fd,addr.str=<fd>[,writable=on|off]\n"
" [,logical-block-size=<block-size>][,num-queues=<num-queues>]\n" " [,logical-block-size=<block-size>][,num-queues=<num-queues>]\n"
" export the specified block node as a\n" " export the specified block node as a\n"
" vhost-user-blk device over file descriptor\n" " vhost-user-blk device over file descriptor\n"

View File

@ -74,6 +74,20 @@ static void panic_cb(VuDev *vu_dev, const char *buf)
error_report("vu_panic: %s", buf); error_report("vu_panic: %s", buf);
} }
void vhost_user_server_ref(VuServer *server)
{
assert(!server->wait_idle);
server->refcount++;
}
void vhost_user_server_unref(VuServer *server)
{
server->refcount--;
if (server->wait_idle && !server->refcount) {
aio_co_wake(server->co_trip);
}
}
static bool coroutine_fn static bool coroutine_fn
vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg) vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
{ {
@ -177,6 +191,14 @@ static coroutine_fn void vu_client_trip(void *opaque)
/* Keep running */ /* Keep running */
} }
if (server->refcount) {
/* Wait for requests to complete before we can unmap the memory */
server->wait_idle = true;
qemu_coroutine_yield();
server->wait_idle = false;
}
assert(server->refcount == 0);
vu_deinit(vu_dev); vu_deinit(vu_dev);
/* vu_deinit() should have called remove_watch() */ /* vu_deinit() should have called remove_watch() */