mirror of https://github.com/xemu-project/xemu.git
block/export: rewrite vduse-blk drain code
vduse_blk_detach_ctx() waits for in-flight requests using AIO_WAIT_WHILE(). This is not allowed according to a comment in bdrv_set_aio_context_commit(): /* * Take the old AioContex when detaching it from bs. * At this point, new_context lock is already acquired, and we are now * also taking old_context. This is safe as long as bdrv_detach_aio_context * does not call AIO_POLL_WHILE(). */ Use this opportunity to rewrite the drain code in vduse-blk: - Use the BlockExport refcount so that vduse_blk_exp_delete() is only called when there are no more requests in flight. - Implement .drained_poll() so in-flight request coroutines are stopped by the time .bdrv_detach_aio_context() is called. - Remove AIO_WAIT_WHILE() from vduse_blk_detach_ctx() to solve the .bdrv_detach_aio_context() constraint violation. It's no longer needed due to the previous changes. - Always handle the VDUSE file descriptor, even in drained sections. The VDUSE file descriptor doesn't submit I/O, so it's safe to handle it in drained sections. This ensures that the VDUSE kernel code gets a fast response. - Suspend virtqueue fd handlers in .drained_begin() and resume them in .drained_end(). This eliminates the need for the aio_set_fd_handler(is_external=true) flag, which is being removed from QEMU. This is a long list but splitting it into individual commits would probably lead to git bisect failures - the changes are all related. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Message-Id: <20230516190238.8401-14-stefanha@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
This commit is contained in:
parent
fb5cba2c7e
commit
195332c1d9
|
@ -31,7 +31,8 @@ typedef struct VduseBlkExport {
|
||||||
VduseDev *dev;
|
VduseDev *dev;
|
||||||
uint16_t num_queues;
|
uint16_t num_queues;
|
||||||
char *recon_file;
|
char *recon_file;
|
||||||
unsigned int inflight;
|
unsigned int inflight; /* atomic */
|
||||||
|
bool vqs_started;
|
||||||
} VduseBlkExport;
|
} VduseBlkExport;
|
||||||
|
|
||||||
typedef struct VduseBlkReq {
|
typedef struct VduseBlkReq {
|
||||||
|
@ -41,13 +42,24 @@ typedef struct VduseBlkReq {
|
||||||
|
|
||||||
static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp)
|
static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp)
|
||||||
{
|
{
|
||||||
vblk_exp->inflight++;
|
if (qatomic_fetch_inc(&vblk_exp->inflight) == 0) {
|
||||||
|
/* Prevent export from being deleted */
|
||||||
|
aio_context_acquire(vblk_exp->export.ctx);
|
||||||
|
blk_exp_ref(&vblk_exp->export);
|
||||||
|
aio_context_release(vblk_exp->export.ctx);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp)
|
static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp)
|
||||||
{
|
{
|
||||||
if (--vblk_exp->inflight == 0) {
|
if (qatomic_fetch_dec(&vblk_exp->inflight) == 1) {
|
||||||
|
/* Wake AIO_WAIT_WHILE() */
|
||||||
aio_wait_kick();
|
aio_wait_kick();
|
||||||
|
|
||||||
|
/* Now the export can be deleted */
|
||||||
|
aio_context_acquire(vblk_exp->export.ctx);
|
||||||
|
blk_exp_unref(&vblk_exp->export);
|
||||||
|
aio_context_release(vblk_exp->export.ctx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -124,8 +136,12 @@ static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
|
||||||
{
|
{
|
||||||
VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
|
VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
|
||||||
|
|
||||||
|
if (!vblk_exp->vqs_started) {
|
||||||
|
return; /* vduse_blk_drained_end() will start vqs later */
|
||||||
|
}
|
||||||
|
|
||||||
aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
|
aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
|
||||||
true, on_vduse_vq_kick, NULL, NULL, NULL, vq);
|
false, on_vduse_vq_kick, NULL, NULL, NULL, vq);
|
||||||
/* Make sure we don't miss any kick afer reconnecting */
|
/* Make sure we don't miss any kick afer reconnecting */
|
||||||
eventfd_write(vduse_queue_get_fd(vq), 1);
|
eventfd_write(vduse_queue_get_fd(vq), 1);
|
||||||
}
|
}
|
||||||
|
@ -133,9 +149,14 @@ static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
|
||||||
static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq)
|
static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq)
|
||||||
{
|
{
|
||||||
VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
|
VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
|
||||||
|
int fd = vduse_queue_get_fd(vq);
|
||||||
|
|
||||||
aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
|
if (fd < 0) {
|
||||||
true, NULL, NULL, NULL, NULL, NULL);
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
aio_set_fd_handler(vblk_exp->export.ctx, fd, false,
|
||||||
|
NULL, NULL, NULL, NULL, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const VduseOps vduse_blk_ops = {
|
static const VduseOps vduse_blk_ops = {
|
||||||
|
@ -152,42 +173,19 @@ static void on_vduse_dev_kick(void *opaque)
|
||||||
|
|
||||||
static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx)
|
static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx)
|
||||||
{
|
{
|
||||||
int i;
|
|
||||||
|
|
||||||
aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
|
aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
|
||||||
true, on_vduse_dev_kick, NULL, NULL, NULL,
|
false, on_vduse_dev_kick, NULL, NULL, NULL,
|
||||||
vblk_exp->dev);
|
vblk_exp->dev);
|
||||||
|
|
||||||
for (i = 0; i < vblk_exp->num_queues; i++) {
|
/* Virtqueues are handled by vduse_blk_drained_end() */
|
||||||
VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
|
|
||||||
int fd = vduse_queue_get_fd(vq);
|
|
||||||
|
|
||||||
if (fd < 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
aio_set_fd_handler(vblk_exp->export.ctx, fd, true,
|
|
||||||
on_vduse_vq_kick, NULL, NULL, NULL, vq);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vduse_blk_detach_ctx(VduseBlkExport *vblk_exp)
|
static void vduse_blk_detach_ctx(VduseBlkExport *vblk_exp)
|
||||||
{
|
{
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < vblk_exp->num_queues; i++) {
|
|
||||||
VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
|
|
||||||
int fd = vduse_queue_get_fd(vq);
|
|
||||||
|
|
||||||
if (fd < 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
aio_set_fd_handler(vblk_exp->export.ctx, fd,
|
|
||||||
true, NULL, NULL, NULL, NULL, NULL);
|
|
||||||
}
|
|
||||||
aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
|
aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
|
||||||
true, NULL, NULL, NULL, NULL, NULL);
|
false, NULL, NULL, NULL, NULL, NULL);
|
||||||
|
|
||||||
AIO_WAIT_WHILE(vblk_exp->export.ctx, vblk_exp->inflight > 0);
|
/* Virtqueues are handled by vduse_blk_drained_begin() */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -220,8 +218,55 @@ static void vduse_blk_resize(void *opaque)
|
||||||
(char *)&config.capacity);
|
(char *)&config.capacity);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void vduse_blk_stop_virtqueues(VduseBlkExport *vblk_exp)
|
||||||
|
{
|
||||||
|
for (uint16_t i = 0; i < vblk_exp->num_queues; i++) {
|
||||||
|
VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
|
||||||
|
vduse_blk_disable_queue(vblk_exp->dev, vq);
|
||||||
|
}
|
||||||
|
|
||||||
|
vblk_exp->vqs_started = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vduse_blk_start_virtqueues(VduseBlkExport *vblk_exp)
|
||||||
|
{
|
||||||
|
vblk_exp->vqs_started = true;
|
||||||
|
|
||||||
|
for (uint16_t i = 0; i < vblk_exp->num_queues; i++) {
|
||||||
|
VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
|
||||||
|
vduse_blk_enable_queue(vblk_exp->dev, vq);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vduse_blk_drained_begin(void *opaque)
|
||||||
|
{
|
||||||
|
BlockExport *exp = opaque;
|
||||||
|
VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
|
||||||
|
|
||||||
|
vduse_blk_stop_virtqueues(vblk_exp);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vduse_blk_drained_end(void *opaque)
|
||||||
|
{
|
||||||
|
BlockExport *exp = opaque;
|
||||||
|
VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
|
||||||
|
|
||||||
|
vduse_blk_start_virtqueues(vblk_exp);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool vduse_blk_drained_poll(void *opaque)
|
||||||
|
{
|
||||||
|
BlockExport *exp = opaque;
|
||||||
|
VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
|
||||||
|
|
||||||
|
return qatomic_read(&vblk_exp->inflight) > 0;
|
||||||
|
}
|
||||||
|
|
||||||
static const BlockDevOps vduse_block_ops = {
|
static const BlockDevOps vduse_block_ops = {
|
||||||
.resize_cb = vduse_blk_resize,
|
.resize_cb = vduse_blk_resize,
|
||||||
|
.drained_begin = vduse_blk_drained_begin,
|
||||||
|
.drained_end = vduse_blk_drained_end,
|
||||||
|
.drained_poll = vduse_blk_drained_poll,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
|
static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
|
||||||
|
@ -268,6 +313,7 @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
|
||||||
vblk_exp->handler.serial = g_strdup(vblk_opts->serial ?: "");
|
vblk_exp->handler.serial = g_strdup(vblk_opts->serial ?: "");
|
||||||
vblk_exp->handler.logical_block_size = logical_block_size;
|
vblk_exp->handler.logical_block_size = logical_block_size;
|
||||||
vblk_exp->handler.writable = opts->writable;
|
vblk_exp->handler.writable = opts->writable;
|
||||||
|
vblk_exp->vqs_started = true;
|
||||||
|
|
||||||
config.capacity =
|
config.capacity =
|
||||||
cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
|
cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
|
||||||
|
@ -322,14 +368,20 @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
|
||||||
vduse_dev_setup_queue(vblk_exp->dev, i, queue_size);
|
vduse_dev_setup_queue(vblk_exp->dev, i, queue_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), true,
|
aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), false,
|
||||||
on_vduse_dev_kick, NULL, NULL, NULL, vblk_exp->dev);
|
on_vduse_dev_kick, NULL, NULL, NULL, vblk_exp->dev);
|
||||||
|
|
||||||
blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
|
blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
|
||||||
vblk_exp);
|
vblk_exp);
|
||||||
|
|
||||||
blk_set_dev_ops(exp->blk, &vduse_block_ops, exp);
|
blk_set_dev_ops(exp->blk, &vduse_block_ops, exp);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We handle draining ourselves using an in-flight counter and by disabling
|
||||||
|
* virtqueue fd handlers. Do not queue BlockBackend requests, they need to
|
||||||
|
* complete so the in-flight counter reaches zero.
|
||||||
|
*/
|
||||||
|
blk_set_disable_request_queuing(exp->blk, true);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
err:
|
err:
|
||||||
vduse_dev_destroy(vblk_exp->dev);
|
vduse_dev_destroy(vblk_exp->dev);
|
||||||
|
@ -344,6 +396,9 @@ static void vduse_blk_exp_delete(BlockExport *exp)
|
||||||
VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
|
VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
assert(qatomic_read(&vblk_exp->inflight) == 0);
|
||||||
|
|
||||||
|
vduse_blk_detach_ctx(vblk_exp);
|
||||||
blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
|
blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
|
||||||
vblk_exp);
|
vblk_exp);
|
||||||
ret = vduse_dev_destroy(vblk_exp->dev);
|
ret = vduse_dev_destroy(vblk_exp->dev);
|
||||||
|
@ -354,13 +409,12 @@ static void vduse_blk_exp_delete(BlockExport *exp)
|
||||||
g_free(vblk_exp->handler.serial);
|
g_free(vblk_exp->handler.serial);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Called with exp->ctx acquired */
|
||||||
static void vduse_blk_exp_request_shutdown(BlockExport *exp)
|
static void vduse_blk_exp_request_shutdown(BlockExport *exp)
|
||||||
{
|
{
|
||||||
VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
|
VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
|
||||||
|
|
||||||
aio_context_acquire(vblk_exp->export.ctx);
|
vduse_blk_stop_virtqueues(vblk_exp);
|
||||||
vduse_blk_detach_ctx(vblk_exp);
|
|
||||||
aio_context_acquire(vblk_exp->export.ctx);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const BlockExportDriver blk_exp_vduse_blk = {
|
const BlockExportDriver blk_exp_vduse_blk = {
|
||||||
|
|
Loading…
Reference in New Issue