From 8187f63c9c64c2f2322fc6730c478a57bc0ce9eb Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 10 May 2023 18:06:19 +0300 Subject: [PATCH 01/21] blockdev: refactor transaction to use Transaction API We are going to add more block-graph modifying transaction actions, and block-graph modifying functions are already based on Transaction API. Next, we'll need to separately update permissions after several graph-modifying actions, and this would be simple with help of Transaction API. So, now let's just transform what we have into new-style transaction actions. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20230510150624.310640-2-vsementsov@yandex-team.ru> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- blockdev.c | 309 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 178 insertions(+), 131 deletions(-) diff --git a/blockdev.c b/blockdev.c index d141ca7a2d..3c72d40f51 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1210,10 +1210,7 @@ typedef struct BlkActionState BlkActionState; */ typedef struct BlkActionOps { size_t instance_size; - void (*prepare)(BlkActionState *common, Error **errp); - void (*commit)(BlkActionState *common); - void (*abort)(BlkActionState *common); - void (*clean)(BlkActionState *common); + void (*action)(BlkActionState *common, Transaction *tran, Error **errp); } BlkActionOps; /** @@ -1245,6 +1242,12 @@ typedef struct InternalSnapshotState { bool created; } InternalSnapshotState; +static void internal_snapshot_abort(void *opaque); +static void internal_snapshot_clean(void *opaque); +TransactionActionDrv internal_snapshot_drv = { + .abort = internal_snapshot_abort, + .clean = internal_snapshot_clean, +}; static int action_check_completion_mode(BlkActionState *s, Error **errp) { @@ -1259,8 +1262,8 @@ static int action_check_completion_mode(BlkActionState *s, Error **errp) return 0; } -static void internal_snapshot_prepare(BlkActionState *common, - Error **errp) +static void internal_snapshot_action(BlkActionState *common, + Transaction *tran, Error **errp) { Error *local_err = NULL; const char *device; @@ -1279,6 +1282,8 @@ static void internal_snapshot_prepare(BlkActionState *common, internal = common->action->u.blockdev_snapshot_internal_sync.data; state = DO_UPCAST(InternalSnapshotState, common, common); + tran_add(tran, &internal_snapshot_drv, state); + /* 1. parse input */ device = internal->device; name = internal->name; @@ -1363,10 +1368,9 @@ out: aio_context_release(aio_context); } -static void internal_snapshot_abort(BlkActionState *common) +static void internal_snapshot_abort(void *opaque) { - InternalSnapshotState *state = - DO_UPCAST(InternalSnapshotState, common, common); + InternalSnapshotState *state = opaque; BlockDriverState *bs = state->bs; QEMUSnapshotInfo *sn = &state->sn; AioContext *aio_context; @@ -1390,10 +1394,9 @@ static void internal_snapshot_abort(BlkActionState *common) aio_context_release(aio_context); } -static void internal_snapshot_clean(BlkActionState *common) +static void internal_snapshot_clean(void *opaque) { - InternalSnapshotState *state = DO_UPCAST(InternalSnapshotState, - common, common); + g_autofree InternalSnapshotState *state = opaque; AioContext *aio_context; if (!state->bs) { @@ -1416,8 +1419,17 @@ typedef struct ExternalSnapshotState { bool overlay_appended; } ExternalSnapshotState; -static void external_snapshot_prepare(BlkActionState *common, - Error **errp) +static void external_snapshot_commit(void *opaque); +static void external_snapshot_abort(void *opaque); +static void external_snapshot_clean(void *opaque); +TransactionActionDrv external_snapshot_drv = { + .commit = external_snapshot_commit, + .abort = external_snapshot_abort, + .clean = external_snapshot_clean, +}; + +static void external_snapshot_action(BlkActionState *common, Transaction *tran, + Error **errp) { int ret; int flags = 0; @@ -1436,6 +1448,8 @@ static void external_snapshot_prepare(BlkActionState *common, AioContext *aio_context; uint64_t perm, shared; + tran_add(tran, &external_snapshot_drv, state); + /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar * purpose but a different set of parameters */ switch (action->type) { @@ -1588,10 +1602,9 @@ out: aio_context_release(aio_context); } -static void external_snapshot_commit(BlkActionState *common) +static void external_snapshot_commit(void *opaque) { - ExternalSnapshotState *state = - DO_UPCAST(ExternalSnapshotState, common, common); + ExternalSnapshotState *state = opaque; AioContext *aio_context; aio_context = bdrv_get_aio_context(state->old_bs); @@ -1607,10 +1620,9 @@ static void external_snapshot_commit(BlkActionState *common) aio_context_release(aio_context); } -static void external_snapshot_abort(BlkActionState *common) +static void external_snapshot_abort(void *opaque) { - ExternalSnapshotState *state = - DO_UPCAST(ExternalSnapshotState, common, common); + ExternalSnapshotState *state = opaque; if (state->new_bs) { if (state->overlay_appended) { AioContext *aio_context; @@ -1650,10 +1662,9 @@ static void external_snapshot_abort(BlkActionState *common) } } -static void external_snapshot_clean(BlkActionState *common) +static void external_snapshot_clean(void *opaque) { - ExternalSnapshotState *state = - DO_UPCAST(ExternalSnapshotState, common, common); + g_autofree ExternalSnapshotState *state = opaque; AioContext *aio_context; if (!state->old_bs) { @@ -1681,7 +1692,17 @@ static BlockJob *do_backup_common(BackupCommon *backup, AioContext *aio_context, JobTxn *txn, Error **errp); -static void drive_backup_prepare(BlkActionState *common, Error **errp) +static void drive_backup_commit(void *opaque); +static void drive_backup_abort(void *opaque); +static void drive_backup_clean(void *opaque); +TransactionActionDrv drive_backup_drv = { + .commit = drive_backup_commit, + .abort = drive_backup_abort, + .clean = drive_backup_clean, +}; + +static void drive_backup_action(BlkActionState *common, Transaction *tran, + Error **errp) { DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); DriveBackup *backup; @@ -1698,6 +1719,8 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) bool set_backing_hd = false; int ret; + tran_add(tran, &drive_backup_drv, state); + assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); backup = common->action->u.drive_backup.data; @@ -1828,9 +1851,9 @@ out: aio_context_release(aio_context); } -static void drive_backup_commit(BlkActionState *common) +static void drive_backup_commit(void *opaque) { - DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); + DriveBackupState *state = opaque; AioContext *aio_context; aio_context = bdrv_get_aio_context(state->bs); @@ -1842,18 +1865,18 @@ static void drive_backup_commit(BlkActionState *common) aio_context_release(aio_context); } -static void drive_backup_abort(BlkActionState *common) +static void drive_backup_abort(void *opaque) { - DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); + DriveBackupState *state = opaque; if (state->job) { job_cancel_sync(&state->job->job, true); } } -static void drive_backup_clean(BlkActionState *common) +static void drive_backup_clean(void *opaque) { - DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); + g_autofree DriveBackupState *state = opaque; AioContext *aio_context; if (!state->bs) { @@ -1874,7 +1897,17 @@ typedef struct BlockdevBackupState { BlockJob *job; } BlockdevBackupState; -static void blockdev_backup_prepare(BlkActionState *common, Error **errp) +static void blockdev_backup_commit(void *opaque); +static void blockdev_backup_abort(void *opaque); +static void blockdev_backup_clean(void *opaque); +TransactionActionDrv blockdev_backup_drv = { + .commit = blockdev_backup_commit, + .abort = blockdev_backup_abort, + .clean = blockdev_backup_clean, +}; + +static void blockdev_backup_action(BlkActionState *common, Transaction *tran, + Error **errp) { BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); BlockdevBackup *backup; @@ -1884,6 +1917,8 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) AioContext *old_context; int ret; + tran_add(tran, &blockdev_backup_drv, state); + assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); backup = common->action->u.blockdev_backup.data; @@ -1922,9 +1957,9 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) aio_context_release(aio_context); } -static void blockdev_backup_commit(BlkActionState *common) +static void blockdev_backup_commit(void *opaque) { - BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); + BlockdevBackupState *state = opaque; AioContext *aio_context; aio_context = bdrv_get_aio_context(state->bs); @@ -1936,18 +1971,18 @@ static void blockdev_backup_commit(BlkActionState *common) aio_context_release(aio_context); } -static void blockdev_backup_abort(BlkActionState *common) +static void blockdev_backup_abort(void *opaque) { - BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); + BlockdevBackupState *state = opaque; if (state->job) { job_cancel_sync(&state->job->job, true); } } -static void blockdev_backup_clean(BlkActionState *common) +static void blockdev_backup_clean(void *opaque) { - BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); + g_autofree BlockdevBackupState *state = opaque; AioContext *aio_context; if (!state->bs) { @@ -1971,14 +2006,22 @@ typedef struct BlockDirtyBitmapState { bool was_enabled; } BlockDirtyBitmapState; -static void block_dirty_bitmap_add_prepare(BlkActionState *common, - Error **errp) +static void block_dirty_bitmap_add_abort(void *opaque); +TransactionActionDrv block_dirty_bitmap_add_drv = { + .abort = block_dirty_bitmap_add_abort, + .clean = g_free, +}; + +static void block_dirty_bitmap_add_action(BlkActionState *common, + Transaction *tran, Error **errp) { Error *local_err = NULL; BlockDirtyBitmapAdd *action; BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, common, common); + tran_add(tran, &block_dirty_bitmap_add_drv, state); + if (action_check_completion_mode(common, errp) < 0) { return; } @@ -1998,13 +2041,12 @@ static void block_dirty_bitmap_add_prepare(BlkActionState *common, } } -static void block_dirty_bitmap_add_abort(BlkActionState *common) +static void block_dirty_bitmap_add_abort(void *opaque) { BlockDirtyBitmapAdd *action; - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = opaque; - action = common->action->u.block_dirty_bitmap_add.data; + action = state->common.action->u.block_dirty_bitmap_add.data; /* Should not be able to fail: IF the bitmap was added via .prepare(), * then the node reference and bitmap name must have been valid. */ @@ -2013,13 +2055,23 @@ static void block_dirty_bitmap_add_abort(BlkActionState *common) } } -static void block_dirty_bitmap_clear_prepare(BlkActionState *common, - Error **errp) +static void block_dirty_bitmap_restore(void *opaque); +static void block_dirty_bitmap_free_backup(void *opaque); +TransactionActionDrv block_dirty_bitmap_clear_drv = { + .abort = block_dirty_bitmap_restore, + .commit = block_dirty_bitmap_free_backup, + .clean = g_free, +}; + +static void block_dirty_bitmap_clear_action(BlkActionState *common, + Transaction *tran, Error **errp) { BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, common, common); BlockDirtyBitmap *action; + tran_add(tran, &block_dirty_bitmap_clear_drv, state); + if (action_check_completion_mode(common, errp) < 0) { return; } @@ -2040,31 +2092,37 @@ static void block_dirty_bitmap_clear_prepare(BlkActionState *common, bdrv_clear_dirty_bitmap(state->bitmap, &state->backup); } -static void block_dirty_bitmap_restore(BlkActionState *common) +static void block_dirty_bitmap_restore(void *opaque) { - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = opaque; if (state->backup) { bdrv_restore_dirty_bitmap(state->bitmap, state->backup); } } -static void block_dirty_bitmap_free_backup(BlkActionState *common) +static void block_dirty_bitmap_free_backup(void *opaque) { - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = opaque; hbitmap_free(state->backup); } -static void block_dirty_bitmap_enable_prepare(BlkActionState *common, - Error **errp) +static void block_dirty_bitmap_enable_abort(void *opaque); +TransactionActionDrv block_dirty_bitmap_enable_drv = { + .abort = block_dirty_bitmap_enable_abort, + .clean = g_free, +}; + +static void block_dirty_bitmap_enable_action(BlkActionState *common, + Transaction *tran, Error **errp) { BlockDirtyBitmap *action; BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, common, common); + tran_add(tran, &block_dirty_bitmap_enable_drv, state); + if (action_check_completion_mode(common, errp) < 0) { return; } @@ -2086,23 +2144,30 @@ static void block_dirty_bitmap_enable_prepare(BlkActionState *common, bdrv_enable_dirty_bitmap(state->bitmap); } -static void block_dirty_bitmap_enable_abort(BlkActionState *common) +static void block_dirty_bitmap_enable_abort(void *opaque) { - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = opaque; if (!state->was_enabled) { bdrv_disable_dirty_bitmap(state->bitmap); } } -static void block_dirty_bitmap_disable_prepare(BlkActionState *common, - Error **errp) +static void block_dirty_bitmap_disable_abort(void *opaque); +TransactionActionDrv block_dirty_bitmap_disable_drv = { + .abort = block_dirty_bitmap_disable_abort, + .clean = g_free, +}; + +static void block_dirty_bitmap_disable_action(BlkActionState *common, + Transaction *tran, Error **errp) { BlockDirtyBitmap *action; BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, common, common); + tran_add(tran, &block_dirty_bitmap_disable_drv, state); + if (action_check_completion_mode(common, errp) < 0) { return; } @@ -2124,23 +2189,30 @@ static void block_dirty_bitmap_disable_prepare(BlkActionState *common, bdrv_disable_dirty_bitmap(state->bitmap); } -static void block_dirty_bitmap_disable_abort(BlkActionState *common) +static void block_dirty_bitmap_disable_abort(void *opaque) { - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = opaque; if (state->was_enabled) { bdrv_enable_dirty_bitmap(state->bitmap); } } -static void block_dirty_bitmap_merge_prepare(BlkActionState *common, - Error **errp) +TransactionActionDrv block_dirty_bitmap_merge_drv = { + .commit = block_dirty_bitmap_free_backup, + .abort = block_dirty_bitmap_restore, + .clean = g_free, +}; + +static void block_dirty_bitmap_merge_action(BlkActionState *common, + Transaction *tran, Error **errp) { BlockDirtyBitmapMerge *action; BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, common, common); + tran_add(tran, &block_dirty_bitmap_merge_drv, state); + if (action_check_completion_mode(common, errp) < 0) { return; } @@ -2152,13 +2224,23 @@ static void block_dirty_bitmap_merge_prepare(BlkActionState *common, errp); } -static void block_dirty_bitmap_remove_prepare(BlkActionState *common, - Error **errp) +static void block_dirty_bitmap_remove_commit(void *opaque); +static void block_dirty_bitmap_remove_abort(void *opaque); +TransactionActionDrv block_dirty_bitmap_remove_drv = { + .commit = block_dirty_bitmap_remove_commit, + .abort = block_dirty_bitmap_remove_abort, + .clean = g_free, +}; + +static void block_dirty_bitmap_remove_action(BlkActionState *common, + Transaction *tran, Error **errp) { BlockDirtyBitmap *action; BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, common, common); + tran_add(tran, &block_dirty_bitmap_remove_drv, state); + if (action_check_completion_mode(common, errp) < 0) { return; } @@ -2173,10 +2255,9 @@ static void block_dirty_bitmap_remove_prepare(BlkActionState *common, } } -static void block_dirty_bitmap_remove_abort(BlkActionState *common) +static void block_dirty_bitmap_remove_abort(void *opaque) { - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = opaque; if (state->bitmap) { bdrv_dirty_bitmap_skip_store(state->bitmap, false); @@ -2184,21 +2265,28 @@ static void block_dirty_bitmap_remove_abort(BlkActionState *common) } } -static void block_dirty_bitmap_remove_commit(BlkActionState *common) +static void block_dirty_bitmap_remove_commit(void *opaque) { - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = opaque; bdrv_dirty_bitmap_set_busy(state->bitmap, false); bdrv_release_dirty_bitmap(state->bitmap); } -static void abort_prepare(BlkActionState *common, Error **errp) +static void abort_commit(void *opaque); +TransactionActionDrv abort_drv = { + .commit = abort_commit, + .clean = g_free, +}; + +static void abort_action(BlkActionState *common, Transaction *tran, + Error **errp) { + tran_add(tran, &abort_drv, common); error_setg(errp, "Transaction aborted using Abort action"); } -static void abort_commit(BlkActionState *common) +static void abort_commit(void *opaque) { g_assert_not_reached(); /* this action never succeeds */ } @@ -2206,75 +2294,51 @@ static void abort_commit(BlkActionState *common) static const BlkActionOps actions[] = { [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT] = { .instance_size = sizeof(ExternalSnapshotState), - .prepare = external_snapshot_prepare, - .commit = external_snapshot_commit, - .abort = external_snapshot_abort, - .clean = external_snapshot_clean, + .action = external_snapshot_action, }, [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = { .instance_size = sizeof(ExternalSnapshotState), - .prepare = external_snapshot_prepare, - .commit = external_snapshot_commit, - .abort = external_snapshot_abort, - .clean = external_snapshot_clean, + .action = external_snapshot_action, }, [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = { .instance_size = sizeof(DriveBackupState), - .prepare = drive_backup_prepare, - .commit = drive_backup_commit, - .abort = drive_backup_abort, - .clean = drive_backup_clean, + .action = drive_backup_action, }, [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = { .instance_size = sizeof(BlockdevBackupState), - .prepare = blockdev_backup_prepare, - .commit = blockdev_backup_commit, - .abort = blockdev_backup_abort, - .clean = blockdev_backup_clean, + .action = blockdev_backup_action, }, [TRANSACTION_ACTION_KIND_ABORT] = { .instance_size = sizeof(BlkActionState), - .prepare = abort_prepare, - .commit = abort_commit, + .action = abort_action, }, [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC] = { .instance_size = sizeof(InternalSnapshotState), - .prepare = internal_snapshot_prepare, - .abort = internal_snapshot_abort, - .clean = internal_snapshot_clean, + .action = internal_snapshot_action, }, [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ADD] = { .instance_size = sizeof(BlockDirtyBitmapState), - .prepare = block_dirty_bitmap_add_prepare, - .abort = block_dirty_bitmap_add_abort, + .action = block_dirty_bitmap_add_action, }, [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR] = { .instance_size = sizeof(BlockDirtyBitmapState), - .prepare = block_dirty_bitmap_clear_prepare, - .commit = block_dirty_bitmap_free_backup, - .abort = block_dirty_bitmap_restore, + .action = block_dirty_bitmap_clear_action, }, [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ENABLE] = { .instance_size = sizeof(BlockDirtyBitmapState), - .prepare = block_dirty_bitmap_enable_prepare, - .abort = block_dirty_bitmap_enable_abort, + .action = block_dirty_bitmap_enable_action, }, [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_DISABLE] = { .instance_size = sizeof(BlockDirtyBitmapState), - .prepare = block_dirty_bitmap_disable_prepare, - .abort = block_dirty_bitmap_disable_abort, + .action = block_dirty_bitmap_disable_action, }, [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_MERGE] = { .instance_size = sizeof(BlockDirtyBitmapState), - .prepare = block_dirty_bitmap_merge_prepare, - .commit = block_dirty_bitmap_free_backup, - .abort = block_dirty_bitmap_restore, + .action = block_dirty_bitmap_merge_action, }, [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_REMOVE] = { .instance_size = sizeof(BlockDirtyBitmapState), - .prepare = block_dirty_bitmap_remove_prepare, - .commit = block_dirty_bitmap_remove_commit, - .abort = block_dirty_bitmap_remove_abort, + .action = block_dirty_bitmap_remove_action, }, /* Where are transactions for MIRROR, COMMIT and STREAM? * Although these blockjobs use transaction callbacks like the backup job, @@ -2316,14 +2380,11 @@ void qmp_transaction(TransactionActionList *dev_list, TransactionActionList *dev_entry = dev_list; bool has_props = !!props; JobTxn *block_job_txn = NULL; - BlkActionState *state, *next; Error *local_err = NULL; + Transaction *tran = tran_new(); GLOBAL_STATE_CODE(); - QTAILQ_HEAD(, BlkActionState) snap_bdrv_states; - QTAILQ_INIT(&snap_bdrv_states); - /* Does this transaction get canceled as a group on failure? * If not, we don't really need to make a JobTxn. */ @@ -2339,6 +2400,7 @@ void qmp_transaction(TransactionActionList *dev_list, while (NULL != dev_entry) { TransactionAction *dev_info = NULL; const BlkActionOps *ops; + BlkActionState *state; dev_info = dev_entry->value; dev_entry = dev_entry->next; @@ -2353,38 +2415,23 @@ void qmp_transaction(TransactionActionList *dev_list, state->action = dev_info; state->block_job_txn = block_job_txn; state->txn_props = props; - QTAILQ_INSERT_TAIL(&snap_bdrv_states, state, entry); - state->ops->prepare(state, &local_err); + state->ops->action(state, tran, &local_err); if (local_err) { error_propagate(errp, local_err); goto delete_and_fail; } } - QTAILQ_FOREACH(state, &snap_bdrv_states, entry) { - if (state->ops->commit) { - state->ops->commit(state); - } - } + tran_commit(tran); /* success */ goto exit; delete_and_fail: /* failure, and it is all-or-none; roll back all operations */ - QTAILQ_FOREACH_REVERSE(state, &snap_bdrv_states, entry) { - if (state->ops->abort) { - state->ops->abort(state); - } - } + tran_abort(tran); exit: - QTAILQ_FOREACH_SAFE(state, &snap_bdrv_states, entry, next) { - if (state->ops->clean) { - state->ops->clean(state); - } - g_free(state); - } if (!has_props) { qapi_free_TransactionProperties(props); } From 240396965fc653756aed90edc2985f05783c5ad6 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 10 May 2023 18:06:20 +0300 Subject: [PATCH 02/21] blockdev: transactions: rename some things Look at qmp_transaction(): dev_list is not obvious name for list of actions. Let's look at qapi spec, this argument is "actions". Let's follow the common practice of using same argument names in qapi scheme and code. To be honest, rename props to properties for same reason. Next, we have to rename global map of actions, to not conflict with new name for function argument. Rename also dev_entry loop variable accordingly to new name of the list. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Kevin Wolf Message-Id: <20230510150624.310640-3-vsementsov@yandex-team.ru> Signed-off-by: Kevin Wolf --- blockdev.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/blockdev.c b/blockdev.c index 3c72d40f51..75e313f403 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2291,7 +2291,7 @@ static void abort_commit(void *opaque) g_assert_not_reached(); /* this action never succeeds */ } -static const BlkActionOps actions[] = { +static const BlkActionOps actions_map[] = { [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT] = { .instance_size = sizeof(ExternalSnapshotState), .action = external_snapshot_action, @@ -2373,12 +2373,12 @@ static TransactionProperties *get_transaction_properties( * * Always run under BQL. */ -void qmp_transaction(TransactionActionList *dev_list, - struct TransactionProperties *props, +void qmp_transaction(TransactionActionList *actions, + struct TransactionProperties *properties, Error **errp) { - TransactionActionList *dev_entry = dev_list; - bool has_props = !!props; + TransactionActionList *act = actions; + bool has_properties = !!properties; JobTxn *block_job_txn = NULL; Error *local_err = NULL; Transaction *tran = tran_new(); @@ -2388,8 +2388,8 @@ void qmp_transaction(TransactionActionList *dev_list, /* Does this transaction get canceled as a group on failure? * If not, we don't really need to make a JobTxn. */ - props = get_transaction_properties(props); - if (props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) { + properties = get_transaction_properties(properties); + if (properties->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) { block_job_txn = job_txn_new(); } @@ -2397,24 +2397,24 @@ void qmp_transaction(TransactionActionList *dev_list, bdrv_drain_all(); /* We don't do anything in this loop that commits us to the operations */ - while (NULL != dev_entry) { + while (NULL != act) { TransactionAction *dev_info = NULL; const BlkActionOps *ops; BlkActionState *state; - dev_info = dev_entry->value; - dev_entry = dev_entry->next; + dev_info = act->value; + act = act->next; - assert(dev_info->type < ARRAY_SIZE(actions)); + assert(dev_info->type < ARRAY_SIZE(actions_map)); - ops = &actions[dev_info->type]; + ops = &actions_map[dev_info->type]; assert(ops->instance_size > 0); state = g_malloc0(ops->instance_size); state->ops = ops; state->action = dev_info; state->block_job_txn = block_job_txn; - state->txn_props = props; + state->txn_props = properties; state->ops->action(state, tran, &local_err); if (local_err) { @@ -2432,8 +2432,8 @@ delete_and_fail: /* failure, and it is all-or-none; roll back all operations */ tran_abort(tran); exit: - if (!has_props) { - qapi_free_TransactionProperties(props); + if (!has_properties) { + qapi_free_TransactionProperties(properties); } job_txn_unref(block_job_txn); } From 30c96b555974ed0341b3c374b20a26242f1de239 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 10 May 2023 18:06:21 +0300 Subject: [PATCH 03/21] blockdev: qmp_transaction: refactor loop to classic for Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Kevin Wolf Message-Id: <20230510150624.310640-4-vsementsov@yandex-team.ru> Signed-off-by: Kevin Wolf --- blockdev.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/blockdev.c b/blockdev.c index 75e313f403..dd0e98bbe1 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2377,7 +2377,7 @@ void qmp_transaction(TransactionActionList *actions, struct TransactionProperties *properties, Error **errp) { - TransactionActionList *act = actions; + TransactionActionList *act; bool has_properties = !!properties; JobTxn *block_job_txn = NULL; Error *local_err = NULL; @@ -2397,14 +2397,11 @@ void qmp_transaction(TransactionActionList *actions, bdrv_drain_all(); /* We don't do anything in this loop that commits us to the operations */ - while (NULL != act) { - TransactionAction *dev_info = NULL; + for (act = actions; act; act = act->next) { + TransactionAction *dev_info = act->value; const BlkActionOps *ops; BlkActionState *state; - dev_info = act->value; - act = act->next; - assert(dev_info->type < ARRAY_SIZE(actions_map)); ops = &actions_map[dev_info->type]; From c85f34cf89edb8e7bcdb35bc423da8a7e4b8c7ba Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 10 May 2023 18:06:22 +0300 Subject: [PATCH 04/21] blockdev: transaction: refactor handling transaction properties Only backup supports GROUPED mode. Make this logic more clear. And avoid passing extra thing to each action. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20230510150624.310640-5-vsementsov@yandex-team.ru> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- blockdev.c | 96 +++++++++++++----------------------------------------- 1 file changed, 22 insertions(+), 74 deletions(-) diff --git a/blockdev.c b/blockdev.c index dd0e98bbe1..a2ebaa5afc 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1230,7 +1230,6 @@ struct BlkActionState { TransactionAction *action; const BlkActionOps *ops; JobTxn *block_job_txn; - TransactionProperties *txn_props; QTAILQ_ENTRY(BlkActionState) entry; }; @@ -1249,19 +1248,6 @@ TransactionActionDrv internal_snapshot_drv = { .clean = internal_snapshot_clean, }; -static int action_check_completion_mode(BlkActionState *s, Error **errp) -{ - if (s->txn_props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) { - error_setg(errp, - "Action '%s' does not support Transaction property " - "completion-mode = %s", - TransactionActionKind_str(s->action->type), - ActionCompletionMode_str(s->txn_props->completion_mode)); - return -1; - } - return 0; -} - static void internal_snapshot_action(BlkActionState *common, Transaction *tran, Error **errp) { @@ -1284,15 +1270,9 @@ static void internal_snapshot_action(BlkActionState *common, tran_add(tran, &internal_snapshot_drv, state); - /* 1. parse input */ device = internal->device; name = internal->name; - /* 2. check for validation */ - if (action_check_completion_mode(common, errp) < 0) { - return; - } - bs = qmp_get_root_bs(device, errp); if (!bs) { return; @@ -1476,9 +1456,6 @@ static void external_snapshot_action(BlkActionState *common, Transaction *tran, } /* start processing */ - if (action_check_completion_mode(common, errp) < 0) { - return; - } state->old_bs = bdrv_lookup_bs(device, node_name, errp); if (!state->old_bs) { @@ -2022,10 +1999,6 @@ static void block_dirty_bitmap_add_action(BlkActionState *common, tran_add(tran, &block_dirty_bitmap_add_drv, state); - if (action_check_completion_mode(common, errp) < 0) { - return; - } - action = common->action->u.block_dirty_bitmap_add.data; /* AIO context taken and released within qmp_block_dirty_bitmap_add */ qmp_block_dirty_bitmap_add(action->node, action->name, @@ -2072,10 +2045,6 @@ static void block_dirty_bitmap_clear_action(BlkActionState *common, tran_add(tran, &block_dirty_bitmap_clear_drv, state); - if (action_check_completion_mode(common, errp) < 0) { - return; - } - action = common->action->u.block_dirty_bitmap_clear.data; state->bitmap = block_dirty_bitmap_lookup(action->node, action->name, @@ -2123,10 +2092,6 @@ static void block_dirty_bitmap_enable_action(BlkActionState *common, tran_add(tran, &block_dirty_bitmap_enable_drv, state); - if (action_check_completion_mode(common, errp) < 0) { - return; - } - action = common->action->u.block_dirty_bitmap_enable.data; state->bitmap = block_dirty_bitmap_lookup(action->node, action->name, @@ -2168,10 +2133,6 @@ static void block_dirty_bitmap_disable_action(BlkActionState *common, tran_add(tran, &block_dirty_bitmap_disable_drv, state); - if (action_check_completion_mode(common, errp) < 0) { - return; - } - action = common->action->u.block_dirty_bitmap_disable.data; state->bitmap = block_dirty_bitmap_lookup(action->node, action->name, @@ -2213,10 +2174,6 @@ static void block_dirty_bitmap_merge_action(BlkActionState *common, tran_add(tran, &block_dirty_bitmap_merge_drv, state); - if (action_check_completion_mode(common, errp) < 0) { - return; - } - action = common->action->u.block_dirty_bitmap_merge.data; state->bitmap = block_dirty_bitmap_merge(action->node, action->target, @@ -2241,10 +2198,6 @@ static void block_dirty_bitmap_remove_action(BlkActionState *common, tran_add(tran, &block_dirty_bitmap_remove_drv, state); - if (action_check_completion_mode(common, errp) < 0) { - return; - } - action = common->action->u.block_dirty_bitmap_remove.data; state->bitmap = block_dirty_bitmap_remove(action->node, action->name, @@ -2348,25 +2301,6 @@ static const BlkActionOps actions_map[] = { */ }; -/** - * Allocate a TransactionProperties structure if necessary, and fill - * that structure with desired defaults if they are unset. - */ -static TransactionProperties *get_transaction_properties( - TransactionProperties *props) -{ - if (!props) { - props = g_new0(TransactionProperties, 1); - } - - if (!props->has_completion_mode) { - props->has_completion_mode = true; - props->completion_mode = ACTION_COMPLETION_MODE_INDIVIDUAL; - } - - return props; -} - /* * 'Atomic' group operations. The operations are performed as a set, and if * any fail then we roll back all operations in the group. @@ -2378,24 +2312,42 @@ void qmp_transaction(TransactionActionList *actions, Error **errp) { TransactionActionList *act; - bool has_properties = !!properties; JobTxn *block_job_txn = NULL; Error *local_err = NULL; - Transaction *tran = tran_new(); + Transaction *tran; + ActionCompletionMode comp_mode = + properties ? properties->completion_mode : + ACTION_COMPLETION_MODE_INDIVIDUAL; GLOBAL_STATE_CODE(); /* Does this transaction get canceled as a group on failure? * If not, we don't really need to make a JobTxn. */ - properties = get_transaction_properties(properties); - if (properties->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) { + if (comp_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) { + for (act = actions; act; act = act->next) { + TransactionActionKind type = act->value->type; + + if (type != TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP && + type != TRANSACTION_ACTION_KIND_DRIVE_BACKUP) + { + error_setg(errp, + "Action '%s' does not support transaction property " + "completion-mode = %s", + TransactionActionKind_str(type), + ActionCompletionMode_str(comp_mode)); + return; + } + } + block_job_txn = job_txn_new(); } /* drain all i/o before any operations */ bdrv_drain_all(); + tran = tran_new(); + /* We don't do anything in this loop that commits us to the operations */ for (act = actions; act; act = act->next) { TransactionAction *dev_info = act->value; @@ -2411,7 +2363,6 @@ void qmp_transaction(TransactionActionList *actions, state->ops = ops; state->action = dev_info; state->block_job_txn = block_job_txn; - state->txn_props = properties; state->ops->action(state, tran, &local_err); if (local_err) { @@ -2429,9 +2380,6 @@ delete_and_fail: /* failure, and it is all-or-none; roll back all operations */ tran_abort(tran); exit: - if (!has_properties) { - qapi_free_TransactionProperties(properties); - } job_txn_unref(block_job_txn); } From c85feafa98e3f7835407d39bf5bfadf13f32075f Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 10 May 2023 18:06:23 +0300 Subject: [PATCH 05/21] blockdev: use state.bitmap in block-dirty-bitmap-add action Other bitmap related actions use the .bitmap pointer in .abort action, let's do same here: 1. It helps further refactoring, as bitmap-add is the only bitmap action that uses state.action in .abort 2. It must be safe: transaction actions rely on the fact that on .abort() the state is the same as at the end of .prepare(), so that in .abort() we could precisely rollback the changes done by .prepare(). The only way to remove the bitmap during transaction should be block-dirty-bitmap-remove action, but it postpones actual removal to .commit(), so we are OK on any rollback path. (Note also that bitmap-remove is the only bitmap action that has .commit() phase, except for simple g_free the state on .clean()) 3. Again, other bitmap actions behave this way: keep the bitmap pointer during the transaction. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20230510150624.310640-6-vsementsov@yandex-team.ru> [kwolf: Also remove the now unused BlockDirtyBitmapState.prepared] Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- blockdev.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/blockdev.c b/blockdev.c index a2ebaa5afc..4bf15566b2 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1979,7 +1979,6 @@ typedef struct BlockDirtyBitmapState { BdrvDirtyBitmap *bitmap; BlockDriverState *bs; HBitmap *backup; - bool prepared; bool was_enabled; } BlockDirtyBitmapState; @@ -2008,7 +2007,8 @@ static void block_dirty_bitmap_add_action(BlkActionState *common, &local_err); if (!local_err) { - state->prepared = true; + state->bitmap = block_dirty_bitmap_lookup(action->node, action->name, + NULL, &error_abort); } else { error_propagate(errp, local_err); } @@ -2016,15 +2016,10 @@ static void block_dirty_bitmap_add_action(BlkActionState *common, static void block_dirty_bitmap_add_abort(void *opaque) { - BlockDirtyBitmapAdd *action; BlockDirtyBitmapState *state = opaque; - action = state->common.action->u.block_dirty_bitmap_add.data; - /* Should not be able to fail: IF the bitmap was added via .prepare(), - * then the node reference and bitmap name must have been valid. - */ - if (state->prepared) { - qmp_block_dirty_bitmap_remove(action->node, action->name, &error_abort); + if (state->bitmap) { + bdrv_release_dirty_bitmap(state->bitmap); } } From d53c89aed1d25b8a9d98b3904e9226bda699adf1 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 10 May 2023 18:06:24 +0300 Subject: [PATCH 06/21] blockdev: qmp_transaction: drop extra generic layer Let's simplify things: First, actions generally don't need access to common BlkActionState structure. The only exclusion are backup actions that need block_job_txn. Next, for transaction actions of Transaction API is more native to allocated state structure in the action itself. So, do the following transformation: 1. Let all actions be represented by a function with corresponding structure as arguments. 2. Instead of array-map marshaller, let's make a function, that calls corresponding action directly. 3. BlkActionOps and BlkActionState structures become unused Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20230510150624.310640-7-vsementsov@yandex-team.ru> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- blockdev.c | 265 +++++++++++++++++------------------------------------ 1 file changed, 85 insertions(+), 180 deletions(-) diff --git a/blockdev.c b/blockdev.c index 4bf15566b2..5d56b79df4 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1188,54 +1188,8 @@ out_aio_context: return NULL; } -/* New and old BlockDriverState structs for atomic group operations */ - -typedef struct BlkActionState BlkActionState; - -/** - * BlkActionOps: - * Table of operations that define an Action. - * - * @instance_size: Size of state struct, in bytes. - * @prepare: Prepare the work, must NOT be NULL. - * @commit: Commit the changes, can be NULL. - * @abort: Abort the changes on fail, can be NULL. - * @clean: Clean up resources after all transaction actions have called - * commit() or abort(). Can be NULL. - * - * Only prepare() may fail. In a single transaction, only one of commit() or - * abort() will be called. clean() will always be called if it is present. - * - * Always run under BQL. - */ -typedef struct BlkActionOps { - size_t instance_size; - void (*action)(BlkActionState *common, Transaction *tran, Error **errp); -} BlkActionOps; - -/** - * BlkActionState: - * Describes one Action's state within a Transaction. - * - * @action: QAPI-defined enum identifying which Action to perform. - * @ops: Table of ActionOps this Action can perform. - * @block_job_txn: Transaction which this action belongs to. - * @entry: List membership for all Actions in this Transaction. - * - * This structure must be arranged as first member in a subclassed type, - * assuming that the compiler will also arrange it to the same offsets as the - * base class. - */ -struct BlkActionState { - TransactionAction *action; - const BlkActionOps *ops; - JobTxn *block_job_txn; - QTAILQ_ENTRY(BlkActionState) entry; -}; - /* internal snapshot private data */ typedef struct InternalSnapshotState { - BlkActionState common; BlockDriverState *bs; QEMUSnapshotInfo sn; bool created; @@ -1248,7 +1202,7 @@ TransactionActionDrv internal_snapshot_drv = { .clean = internal_snapshot_clean, }; -static void internal_snapshot_action(BlkActionState *common, +static void internal_snapshot_action(BlockdevSnapshotInternal *internal, Transaction *tran, Error **errp) { Error *local_err = NULL; @@ -1258,16 +1212,10 @@ static void internal_snapshot_action(BlkActionState *common, QEMUSnapshotInfo old_sn, *sn; bool ret; int64_t rt; - BlockdevSnapshotInternal *internal; - InternalSnapshotState *state; + InternalSnapshotState *state = g_new0(InternalSnapshotState, 1); AioContext *aio_context; int ret1; - g_assert(common->action->type == - TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC); - internal = common->action->u.blockdev_snapshot_internal_sync.data; - state = DO_UPCAST(InternalSnapshotState, common, common); - tran_add(tran, &internal_snapshot_drv, state); device = internal->device; @@ -1393,7 +1341,6 @@ static void internal_snapshot_clean(void *opaque) /* external snapshot private data */ typedef struct ExternalSnapshotState { - BlkActionState common; BlockDriverState *old_bs; BlockDriverState *new_bs; bool overlay_appended; @@ -1408,8 +1355,8 @@ TransactionActionDrv external_snapshot_drv = { .clean = external_snapshot_clean, }; -static void external_snapshot_action(BlkActionState *common, Transaction *tran, - Error **errp) +static void external_snapshot_action(TransactionAction *action, + Transaction *tran, Error **errp) { int ret; int flags = 0; @@ -1422,9 +1369,7 @@ static void external_snapshot_action(BlkActionState *common, Transaction *tran, const char *snapshot_ref; /* File name of the new image (for 'blockdev-snapshot-sync') */ const char *new_image_file; - ExternalSnapshotState *state = - DO_UPCAST(ExternalSnapshotState, common, common); - TransactionAction *action = common->action; + ExternalSnapshotState *state = g_new0(ExternalSnapshotState, 1); AioContext *aio_context; uint64_t perm, shared; @@ -1658,7 +1603,6 @@ static void external_snapshot_clean(void *opaque) } typedef struct DriveBackupState { - BlkActionState common; BlockDriverState *bs; BlockJob *job; } DriveBackupState; @@ -1678,11 +1622,11 @@ TransactionActionDrv drive_backup_drv = { .clean = drive_backup_clean, }; -static void drive_backup_action(BlkActionState *common, Transaction *tran, - Error **errp) +static void drive_backup_action(DriveBackup *backup, + JobTxn *block_job_txn, + Transaction *tran, Error **errp) { - DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); - DriveBackup *backup; + DriveBackupState *state = g_new0(DriveBackupState, 1); BlockDriverState *bs; BlockDriverState *target_bs; BlockDriverState *source = NULL; @@ -1698,9 +1642,6 @@ static void drive_backup_action(BlkActionState *common, Transaction *tran, tran_add(tran, &drive_backup_drv, state); - assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); - backup = common->action->u.drive_backup.data; - if (!backup->has_mode) { backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; } @@ -1820,7 +1761,7 @@ static void drive_backup_action(BlkActionState *common, Transaction *tran, state->job = do_backup_common(qapi_DriveBackup_base(backup), bs, target_bs, aio_context, - common->block_job_txn, errp); + block_job_txn, errp); unref: bdrv_unref(target_bs); @@ -1869,7 +1810,6 @@ static void drive_backup_clean(void *opaque) } typedef struct BlockdevBackupState { - BlkActionState common; BlockDriverState *bs; BlockJob *job; } BlockdevBackupState; @@ -1883,11 +1823,11 @@ TransactionActionDrv blockdev_backup_drv = { .clean = blockdev_backup_clean, }; -static void blockdev_backup_action(BlkActionState *common, Transaction *tran, - Error **errp) +static void blockdev_backup_action(BlockdevBackup *backup, + JobTxn *block_job_txn, + Transaction *tran, Error **errp) { - BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); - BlockdevBackup *backup; + BlockdevBackupState *state = g_new0(BlockdevBackupState, 1); BlockDriverState *bs; BlockDriverState *target_bs; AioContext *aio_context; @@ -1896,9 +1836,6 @@ static void blockdev_backup_action(BlkActionState *common, Transaction *tran, tran_add(tran, &blockdev_backup_drv, state); - assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); - backup = common->action->u.blockdev_backup.data; - bs = bdrv_lookup_bs(backup->device, backup->device, errp); if (!bs) { return; @@ -1929,7 +1866,7 @@ static void blockdev_backup_action(BlkActionState *common, Transaction *tran, state->job = do_backup_common(qapi_BlockdevBackup_base(backup), bs, target_bs, aio_context, - common->block_job_txn, errp); + block_job_txn, errp); aio_context_release(aio_context); } @@ -1975,7 +1912,6 @@ static void blockdev_backup_clean(void *opaque) } typedef struct BlockDirtyBitmapState { - BlkActionState common; BdrvDirtyBitmap *bitmap; BlockDriverState *bs; HBitmap *backup; @@ -1988,17 +1924,14 @@ TransactionActionDrv block_dirty_bitmap_add_drv = { .clean = g_free, }; -static void block_dirty_bitmap_add_action(BlkActionState *common, +static void block_dirty_bitmap_add_action(BlockDirtyBitmapAdd *action, Transaction *tran, Error **errp) { Error *local_err = NULL; - BlockDirtyBitmapAdd *action; - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1); tran_add(tran, &block_dirty_bitmap_add_drv, state); - action = common->action->u.block_dirty_bitmap_add.data; /* AIO context taken and released within qmp_block_dirty_bitmap_add */ qmp_block_dirty_bitmap_add(action->node, action->name, action->has_granularity, action->granularity, @@ -2031,16 +1964,13 @@ TransactionActionDrv block_dirty_bitmap_clear_drv = { .clean = g_free, }; -static void block_dirty_bitmap_clear_action(BlkActionState *common, +static void block_dirty_bitmap_clear_action(BlockDirtyBitmap *action, Transaction *tran, Error **errp) { - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); - BlockDirtyBitmap *action; + BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1); tran_add(tran, &block_dirty_bitmap_clear_drv, state); - action = common->action->u.block_dirty_bitmap_clear.data; state->bitmap = block_dirty_bitmap_lookup(action->node, action->name, &state->bs, @@ -2078,16 +2008,13 @@ TransactionActionDrv block_dirty_bitmap_enable_drv = { .clean = g_free, }; -static void block_dirty_bitmap_enable_action(BlkActionState *common, +static void block_dirty_bitmap_enable_action(BlockDirtyBitmap *action, Transaction *tran, Error **errp) { - BlockDirtyBitmap *action; - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1); tran_add(tran, &block_dirty_bitmap_enable_drv, state); - action = common->action->u.block_dirty_bitmap_enable.data; state->bitmap = block_dirty_bitmap_lookup(action->node, action->name, NULL, @@ -2119,16 +2046,13 @@ TransactionActionDrv block_dirty_bitmap_disable_drv = { .clean = g_free, }; -static void block_dirty_bitmap_disable_action(BlkActionState *common, +static void block_dirty_bitmap_disable_action(BlockDirtyBitmap *action, Transaction *tran, Error **errp) { - BlockDirtyBitmap *action; - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1); tran_add(tran, &block_dirty_bitmap_disable_drv, state); - action = common->action->u.block_dirty_bitmap_disable.data; state->bitmap = block_dirty_bitmap_lookup(action->node, action->name, NULL, @@ -2160,17 +2084,13 @@ TransactionActionDrv block_dirty_bitmap_merge_drv = { .clean = g_free, }; -static void block_dirty_bitmap_merge_action(BlkActionState *common, +static void block_dirty_bitmap_merge_action(BlockDirtyBitmapMerge *action, Transaction *tran, Error **errp) { - BlockDirtyBitmapMerge *action; - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1); tran_add(tran, &block_dirty_bitmap_merge_drv, state); - action = common->action->u.block_dirty_bitmap_merge.data; - state->bitmap = block_dirty_bitmap_merge(action->node, action->target, action->bitmaps, &state->backup, errp); @@ -2184,16 +2104,13 @@ TransactionActionDrv block_dirty_bitmap_remove_drv = { .clean = g_free, }; -static void block_dirty_bitmap_remove_action(BlkActionState *common, +static void block_dirty_bitmap_remove_action(BlockDirtyBitmap *action, Transaction *tran, Error **errp) { - BlockDirtyBitmap *action; - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1); tran_add(tran, &block_dirty_bitmap_remove_drv, state); - action = common->action->u.block_dirty_bitmap_remove.data; state->bitmap = block_dirty_bitmap_remove(action->node, action->name, false, &state->bs, errp); @@ -2224,13 +2141,11 @@ static void block_dirty_bitmap_remove_commit(void *opaque) static void abort_commit(void *opaque); TransactionActionDrv abort_drv = { .commit = abort_commit, - .clean = g_free, }; -static void abort_action(BlkActionState *common, Transaction *tran, - Error **errp) +static void abort_action(Transaction *tran, Error **errp) { - tran_add(tran, &abort_drv, common); + tran_add(tran, &abort_drv, NULL); error_setg(errp, "Transaction aborted using Abort action"); } @@ -2239,62 +2154,66 @@ static void abort_commit(void *opaque) g_assert_not_reached(); /* this action never succeeds */ } -static const BlkActionOps actions_map[] = { - [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT] = { - .instance_size = sizeof(ExternalSnapshotState), - .action = external_snapshot_action, - }, - [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = { - .instance_size = sizeof(ExternalSnapshotState), - .action = external_snapshot_action, - }, - [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = { - .instance_size = sizeof(DriveBackupState), - .action = drive_backup_action, - }, - [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = { - .instance_size = sizeof(BlockdevBackupState), - .action = blockdev_backup_action, - }, - [TRANSACTION_ACTION_KIND_ABORT] = { - .instance_size = sizeof(BlkActionState), - .action = abort_action, - }, - [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC] = { - .instance_size = sizeof(InternalSnapshotState), - .action = internal_snapshot_action, - }, - [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ADD] = { - .instance_size = sizeof(BlockDirtyBitmapState), - .action = block_dirty_bitmap_add_action, - }, - [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR] = { - .instance_size = sizeof(BlockDirtyBitmapState), - .action = block_dirty_bitmap_clear_action, - }, - [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ENABLE] = { - .instance_size = sizeof(BlockDirtyBitmapState), - .action = block_dirty_bitmap_enable_action, - }, - [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_DISABLE] = { - .instance_size = sizeof(BlockDirtyBitmapState), - .action = block_dirty_bitmap_disable_action, - }, - [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_MERGE] = { - .instance_size = sizeof(BlockDirtyBitmapState), - .action = block_dirty_bitmap_merge_action, - }, - [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_REMOVE] = { - .instance_size = sizeof(BlockDirtyBitmapState), - .action = block_dirty_bitmap_remove_action, - }, - /* Where are transactions for MIRROR, COMMIT and STREAM? +static void transaction_action(TransactionAction *act, JobTxn *block_job_txn, + Transaction *tran, Error **errp) +{ + switch (act->type) { + case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT: + case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC: + external_snapshot_action(act, tran, errp); + return; + case TRANSACTION_ACTION_KIND_DRIVE_BACKUP: + drive_backup_action(act->u.drive_backup.data, + block_job_txn, tran, errp); + return; + case TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP: + blockdev_backup_action(act->u.blockdev_backup.data, + block_job_txn, tran, errp); + return; + case TRANSACTION_ACTION_KIND_ABORT: + abort_action(tran, errp); + return; + case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC: + internal_snapshot_action(act->u.blockdev_snapshot_internal_sync.data, + tran, errp); + return; + case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ADD: + block_dirty_bitmap_add_action(act->u.block_dirty_bitmap_add.data, + tran, errp); + return; + case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR: + block_dirty_bitmap_clear_action(act->u.block_dirty_bitmap_clear.data, + tran, errp); + return; + case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ENABLE: + block_dirty_bitmap_enable_action(act->u.block_dirty_bitmap_enable.data, + tran, errp); + return; + case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_DISABLE: + block_dirty_bitmap_disable_action( + act->u.block_dirty_bitmap_disable.data, tran, errp); + return; + case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_MERGE: + block_dirty_bitmap_merge_action(act->u.block_dirty_bitmap_merge.data, + tran, errp); + return; + case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_REMOVE: + block_dirty_bitmap_remove_action(act->u.block_dirty_bitmap_remove.data, + tran, errp); + return; + /* + * Where are transactions for MIRROR, COMMIT and STREAM? * Although these blockjobs use transaction callbacks like the backup job, * these jobs do not necessarily adhere to transaction semantics. * These jobs may not fully undo all of their actions on abort, nor do they * necessarily work in transactions with more than one job in them. */ -}; + case TRANSACTION_ACTION_KIND__MAX: + default: + g_assert_not_reached(); + }; +} + /* * 'Atomic' group operations. The operations are performed as a set, and if @@ -2345,21 +2264,7 @@ void qmp_transaction(TransactionActionList *actions, /* We don't do anything in this loop that commits us to the operations */ for (act = actions; act; act = act->next) { - TransactionAction *dev_info = act->value; - const BlkActionOps *ops; - BlkActionState *state; - - assert(dev_info->type < ARRAY_SIZE(actions_map)); - - ops = &actions_map[dev_info->type]; - assert(ops->instance_size > 0); - - state = g_malloc0(ops->instance_size); - state->ops = ops; - state->action = dev_info; - state->block_job_txn = block_job_txn; - - state->ops->action(state, tran, &local_err); + transaction_action(act->value, block_job_txn, tran, &local_err); if (local_err) { error_propagate(errp, local_err); goto delete_and_fail; From 41f8b633393021923fd555d8d94bded2f8f6f05d Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Tue, 16 May 2023 23:32:27 +0900 Subject: [PATCH 07/21] docs/interop/qcow2.txt: fix description about "zlib" clusters "zlib" clusters are actually raw deflate (RFC1951) clusters without zlib headers. Signed-off-by: Akihiro Suda Message-Id: <168424874322.11954.1340942046351859521-0@git.sr.ht> Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- docs/interop/qcow2.txt | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt index f7dc304ff6..e7f036c286 100644 --- a/docs/interop/qcow2.txt +++ b/docs/interop/qcow2.txt @@ -214,14 +214,18 @@ version 2. type. If the incompatible bit "Compression type" is set: the field - must be present and non-zero (which means non-zlib + must be present and non-zero (which means non-deflate compression type). Otherwise, this field must not be present - or must be zero (which means zlib). + or must be zero (which means deflate). Available compression type values: - 0: zlib + 0: deflate 1: zstd + The deflate compression type is called "zlib" + in QEMU. However, clusters with the + deflate compression type do not have zlib headers. + === Header padding === From 4db7ba3b87447fd06cd7e23dab69fdae6011496d Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 10 May 2023 22:35:54 +0200 Subject: [PATCH 08/21] block: Call .bdrv_co_create(_opts) unlocked These are functions that modify the graph, so they must be able to take a writer lock. This is impossible if they already hold the reader lock. If they need a reader lock for some of their operations, they should take it internally. Many of them go through blk_*(), which will always take the lock itself. Direct calls of bdrv_*() need to take the reader lock. Note that while locking for bdrv_co_*() calls is checked by TSA, this is not the case for the mixed_coroutine_fns bdrv_*(). Holding the lock is still required when they are called from coroutine context like here! This effectively reverts 4ec8df0183, but adds some internal locking instead. Signed-off-by: Kevin Wolf Message-Id: <20230510203601.418015-2-kwolf@redhat.com> Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block.c | 1 - block/create.c | 1 - block/crypto.c | 25 ++++++++++---------- block/parallels.c | 6 ++--- block/qcow.c | 6 ++--- block/qcow2.c | 37 +++++++++++++++++++----------- block/qed.c | 6 ++--- block/raw-format.c | 2 +- block/vdi.c | 11 +++++---- block/vhdx.c | 8 ++++--- block/vmdk.c | 27 ++++++++++------------ block/vpc.c | 6 ++--- include/block/block-global-state.h | 8 +++---- include/block/block_int-common.h | 4 ++-- 14 files changed, 78 insertions(+), 70 deletions(-) diff --git a/block.c b/block.c index f04a6ad4e8..a2f8d5a0c0 100644 --- a/block.c +++ b/block.c @@ -533,7 +533,6 @@ int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename, int ret; GLOBAL_STATE_CODE(); ERRP_GUARD(); - assert_bdrv_graph_readable(); if (!drv->bdrv_co_create_opts) { error_setg(errp, "Driver '%s' does not support image creation", diff --git a/block/create.c b/block/create.c index bf67b9947c..6b23a21675 100644 --- a/block/create.c +++ b/block/create.c @@ -43,7 +43,6 @@ static int coroutine_fn blockdev_create_run(Job *job, Error **errp) int ret; GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD(); job_progress_set_remaining(&s->common, 1); ret = s->drv->bdrv_co_create(s->opts, errp); diff --git a/block/crypto.c b/block/crypto.c index 30093cff9b..6ee8d46d30 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -99,12 +99,10 @@ struct BlockCryptoCreateData { }; -static int block_crypto_create_write_func(QCryptoBlock *block, - size_t offset, - const uint8_t *buf, - size_t buflen, - void *opaque, - Error **errp) +static int coroutine_fn GRAPH_UNLOCKED +block_crypto_create_write_func(QCryptoBlock *block, size_t offset, + const uint8_t *buf, size_t buflen, void *opaque, + Error **errp) { struct BlockCryptoCreateData *data = opaque; ssize_t ret; @@ -117,10 +115,9 @@ static int block_crypto_create_write_func(QCryptoBlock *block, return 0; } -static int block_crypto_create_init_func(QCryptoBlock *block, - size_t headerlen, - void *opaque, - Error **errp) +static int coroutine_fn GRAPH_UNLOCKED +block_crypto_create_init_func(QCryptoBlock *block, size_t headerlen, + void *opaque, Error **errp) { struct BlockCryptoCreateData *data = opaque; Error *local_error = NULL; @@ -314,7 +311,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format, } -static int coroutine_fn +static int coroutine_fn GRAPH_UNLOCKED block_crypto_co_create_generic(BlockDriverState *bs, int64_t size, QCryptoBlockCreateOptions *opts, PreallocMode prealloc, Error **errp) @@ -627,7 +624,7 @@ static int block_crypto_open_luks(BlockDriverState *bs, bs, options, flags, errp); } -static int coroutine_fn +static int coroutine_fn GRAPH_UNLOCKED block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp) { BlockdevCreateOptionsLUKS *luks_opts; @@ -665,7 +662,7 @@ fail: return ret; } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED block_crypto_co_create_opts_luks(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp) { @@ -727,7 +724,9 @@ fail: * beforehand, it has been truncated and corrupted in the process. */ if (ret) { + bdrv_graph_co_rdlock(); bdrv_co_delete_file_noerr(bs); + bdrv_graph_co_rdunlock(); } bdrv_co_unref(bs); diff --git a/block/parallels.c b/block/parallels.c index b49c35929e..d8a3f13e24 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -522,8 +522,8 @@ out: } -static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts, - Error **errp) +static int coroutine_fn GRAPH_UNLOCKED +parallels_co_create(BlockdevCreateOptions* opts, Error **errp) { BlockdevCreateOptionsParallels *parallels_opts; BlockDriverState *bs; @@ -622,7 +622,7 @@ exit: goto out; } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED parallels_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp) { diff --git a/block/qcow.c b/block/qcow.c index a0c701f578..3644bbf5cb 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -800,8 +800,8 @@ static void qcow_close(BlockDriverState *bs) error_free(s->migration_blocker); } -static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts, - Error **errp) +static int coroutine_fn GRAPH_UNLOCKED +qcow_co_create(BlockdevCreateOptions *opts, Error **errp) { BlockdevCreateOptionsQcow *qcow_opts; int header_size, backing_filename_len, l1_size, shift, i; @@ -921,7 +921,7 @@ exit: return ret; } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED qcow_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp) { diff --git a/block/qcow2.c b/block/qcow2.c index 5bde3b8401..73f36447f9 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -118,8 +118,9 @@ static int qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset, } -static int qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, - void *opaque, Error **errp) +static int coroutine_fn GRAPH_RDLOCK +qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, void *opaque, + Error **errp) { BlockDriverState *bs = opaque; BDRVQcow2State *s = bs->opaque; @@ -144,9 +145,7 @@ static int qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, */ clusterlen = size_to_clusters(s, headerlen) * s->cluster_size; assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0); - ret = bdrv_pwrite_zeroes(bs->file, - ret, - clusterlen, 0); + ret = bdrv_co_pwrite_zeroes(bs->file, ret, clusterlen, 0); if (ret < 0) { error_setg_errno(errp, -ret, "Could not zero fill encryption header"); return -1; @@ -156,9 +155,11 @@ static int qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, } -static int qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset, - const uint8_t *buf, size_t buflen, - void *opaque, Error **errp) +/* The graph lock must be held when called in coroutine context */ +static int coroutine_mixed_fn +qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset, + const uint8_t *buf, size_t buflen, + void *opaque, Error **errp) { BlockDriverState *bs = opaque; BDRVQcow2State *s = bs->opaque; @@ -3137,9 +3138,10 @@ static int qcow2_change_backing_file(BlockDriverState *bs, return qcow2_update_header(bs); } -static int qcow2_set_up_encryption(BlockDriverState *bs, - QCryptoBlockCreateOptions *cryptoopts, - Error **errp) +static int coroutine_fn GRAPH_RDLOCK +qcow2_set_up_encryption(BlockDriverState *bs, + QCryptoBlockCreateOptions *cryptoopts, + Error **errp) { BDRVQcow2State *s = bs->opaque; QCryptoBlock *crypto = NULL; @@ -3426,7 +3428,7 @@ static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version, return refcount_bits; } -static int coroutine_fn +static int coroutine_fn GRAPH_UNLOCKED qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) { BlockdevCreateOptionsQcow2 *qcow2_opts; @@ -3724,8 +3726,10 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) goto out; } + bdrv_graph_co_rdlock(); ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size); if (ret < 0) { + bdrv_graph_co_rdunlock(); error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 " "header and refcount table"); goto out; @@ -3743,6 +3747,8 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) /* Create a full header (including things like feature table) */ ret = qcow2_update_header(blk_bs(blk)); + bdrv_graph_co_rdunlock(); + if (ret < 0) { error_setg_errno(errp, -ret, "Could not update qcow2 header"); goto out; @@ -3776,7 +3782,10 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) /* Want encryption? There you go. */ if (qcow2_opts->encrypt) { + bdrv_graph_co_rdlock(); ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp); + bdrv_graph_co_rdunlock(); + if (ret < 0) { goto out; } @@ -3813,7 +3822,7 @@ out: return ret; } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp) { @@ -3933,8 +3942,10 @@ qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, ret = qcow2_co_create(create_options, errp); finish: if (ret < 0) { + bdrv_graph_co_rdlock(); bdrv_co_delete_file_noerr(bs); bdrv_co_delete_file_noerr(data_bs); + bdrv_graph_co_rdunlock(); } else { ret = 0; } diff --git a/block/qed.c b/block/qed.c index be9ff0fb34..9a0350b534 100644 --- a/block/qed.c +++ b/block/qed.c @@ -630,8 +630,8 @@ static void bdrv_qed_close(BlockDriverState *bs) qemu_vfree(s->l1_table); } -static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts, - Error **errp) +static int coroutine_fn GRAPH_UNLOCKED +bdrv_qed_co_create(BlockdevCreateOptions *opts, Error **errp) { BlockdevCreateOptionsQed *qed_opts; BlockBackend *blk = NULL; @@ -751,7 +751,7 @@ out: return ret; } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp) { diff --git a/block/raw-format.c b/block/raw-format.c index 3a3946213f..918fe4fb7e 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -457,7 +457,7 @@ static int raw_has_zero_init(BlockDriverState *bs) return bdrv_has_zero_init(bs->file->bs); } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED raw_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp) { diff --git a/block/vdi.c b/block/vdi.c index 08331d2dd7..6c35309e04 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -734,8 +734,9 @@ nonallocating_write: return ret; } -static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, - size_t block_size, Error **errp) +static int coroutine_fn GRAPH_UNLOCKED +vdi_co_do_create(BlockdevCreateOptions *create_options, size_t block_size, + Error **errp) { BlockdevCreateOptionsVdi *vdi_opts; int ret = 0; @@ -892,13 +893,13 @@ exit: return ret; } -static int coroutine_fn vdi_co_create(BlockdevCreateOptions *create_options, - Error **errp) +static int coroutine_fn GRAPH_UNLOCKED +vdi_co_create(BlockdevCreateOptions *create_options, Error **errp) { return vdi_co_do_create(create_options, DEFAULT_CLUSTER_SIZE, errp); } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED vdi_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp) { diff --git a/block/vhdx.c b/block/vhdx.c index b20b1edf11..89913cba87 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1506,7 +1506,7 @@ exit: * There are 2 headers, and the highest sequence number will represent * the active header */ -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size, uint32_t log_size) { @@ -1515,6 +1515,8 @@ vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size, int ret = 0; VHDXHeader *hdr = NULL; + GRAPH_RDLOCK_GUARD(); + hdr = g_new0(VHDXHeader, 1); hdr->signature = VHDX_HEADER_SIGNATURE; @@ -1898,7 +1900,7 @@ exit: * .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------. * 1MB */ -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED vhdx_co_create(BlockdevCreateOptions *opts, Error **errp) { BlockdevCreateOptionsVhdx *vhdx_opts; @@ -2060,7 +2062,7 @@ delete_and_exit: return ret; } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED vhdx_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp) { diff --git a/block/vmdk.c b/block/vmdk.c index fddbd1c86c..e3e86608ec 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -2165,10 +2165,9 @@ vmdk_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes, return ret; } -static int vmdk_init_extent(BlockBackend *blk, - int64_t filesize, bool flat, - bool compress, bool zeroed_grain, - Error **errp) +static int GRAPH_UNLOCKED +vmdk_init_extent(BlockBackend *blk, int64_t filesize, bool flat, bool compress, + bool zeroed_grain, Error **errp) { int ret, i; VMDK4Header header; @@ -2277,7 +2276,7 @@ exit: return ret; } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED vmdk_create_extent(const char *filename, int64_t filesize, bool flat, bool compress, bool zeroed_grain, BlockBackend **pbb, QemuOpts *opts, Error **errp) @@ -2358,7 +2357,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix, * non-split format. * idx >= 1: get the n-th extent if in a split subformat */ -typedef BlockBackend * coroutine_fn /* GRAPH_RDLOCK */ +typedef BlockBackend * coroutine_fn GRAPH_UNLOCKED_PTR (*vmdk_create_extent_fn)(int64_t size, int idx, bool flat, bool split, bool compress, bool zeroed_grain, void *opaque, Error **errp); @@ -2374,7 +2373,7 @@ static void vmdk_desc_add_extent(GString *desc, g_free(basename); } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED vmdk_co_do_create(int64_t size, BlockdevVmdkSubformat subformat, BlockdevVmdkAdapterType adapter_type, @@ -2605,7 +2604,7 @@ typedef struct { QemuOpts *opts; } VMDKCreateOptsData; -static BlockBackend * coroutine_fn GRAPH_RDLOCK +static BlockBackend * coroutine_fn GRAPH_UNLOCKED vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split, bool compress, bool zeroed_grain, void *opaque, Error **errp) @@ -2647,7 +2646,7 @@ exit: return blk; } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED vmdk_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp) { @@ -2756,11 +2755,9 @@ exit: return ret; } -static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx, - bool flat, bool split, - bool compress, - bool zeroed_grain, - void *opaque, Error **errp) +static BlockBackend * coroutine_fn GRAPH_UNLOCKED +vmdk_co_create_cb(int64_t size, int idx, bool flat, bool split, bool compress, + bool zeroed_grain, void *opaque, Error **errp) { int ret; BlockDriverState *bs; @@ -2809,7 +2806,7 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx, return blk; } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED vmdk_co_create(BlockdevCreateOptions *create_options, Error **errp) { BlockdevCreateOptionsVmdk *opts; diff --git a/block/vpc.c b/block/vpc.c index 07ddda5b99..7ee7c7b4e0 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -967,8 +967,8 @@ static int calculate_rounded_image_size(BlockdevCreateOptionsVpc *vpc_opts, return 0; } -static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts, - Error **errp) +static int coroutine_fn GRAPH_UNLOCKED +vpc_co_create(BlockdevCreateOptions *opts, Error **errp) { BlockdevCreateOptionsVpc *vpc_opts; BlockBackend *blk = NULL; @@ -1087,7 +1087,7 @@ out: return ret; } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED vpc_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp) { diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index 2d93423d35..f347199bff 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -58,14 +58,14 @@ BlockDriver *bdrv_find_protocol(const char *filename, Error **errp); BlockDriver *bdrv_find_format(const char *format_name); -int coroutine_fn GRAPH_RDLOCK +int coroutine_fn GRAPH_UNLOCKED bdrv_co_create(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp); -int co_wrapper_bdrv_rdlock bdrv_create(BlockDriver *drv, const char *filename, - QemuOpts *opts, Error **errp); +int co_wrapper bdrv_create(BlockDriver *drv, const char *filename, + QemuOpts *opts, Error **errp); -int coroutine_fn GRAPH_RDLOCK +int coroutine_fn GRAPH_UNLOCKED bdrv_co_create_file(const char *filename, QemuOpts *opts, Error **errp); BlockDriverState *bdrv_new(void); diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index dbec0e3bb4..6492a1e538 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -250,10 +250,10 @@ struct BlockDriver { BlockDriverState *bs, QDict *options, int flags, Error **errp); void (*bdrv_close)(BlockDriverState *bs); - int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_create)( + int coroutine_fn GRAPH_UNLOCKED_PTR (*bdrv_co_create)( BlockdevCreateOptions *opts, Error **errp); - int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_create_opts)( + int coroutine_fn GRAPH_UNLOCKED_PTR (*bdrv_co_create_opts)( BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp); int (*bdrv_amend_options)(BlockDriverState *bs, From a184563778f2b8970eb93291f08108e66432a575 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 10 May 2023 22:35:55 +0200 Subject: [PATCH 09/21] block/export: Fix null pointer dereference in error path There are some error paths in blk_exp_add() that jump to 'fail:' before 'exp' is even created. So we can't just unconditionally access exp->blk. Add a NULL check, and switch from exp->blk to blk, which is available earlier, just to be extra sure that we really cover all cases where BlockDevOps could have been set for it (in practice, this only happens in drv->create() today, so this part of the change isn't strictly necessary). Fixes: Coverity CID 1509238 Fixes: de79b52604e43fdeba6cee4f5af600b62169f2d2 Signed-off-by: Kevin Wolf Message-Id: <20230510203601.418015-3-kwolf@redhat.com> Reviewed-by: Eric Blake Tested-by: Eric Blake Signed-off-by: Kevin Wolf --- block/export/export.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/block/export/export.c b/block/export/export.c index 62c7c22d45..a5c8f42f53 100644 --- a/block/export/export.c +++ b/block/export/export.c @@ -192,8 +192,10 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) return exp; fail: - blk_set_dev_ops(exp->blk, NULL, NULL); - blk_unref(blk); + if (blk) { + blk_set_dev_ops(blk, NULL, NULL); + blk_unref(blk); + } aio_context_release(ctx); if (exp) { g_free(exp->id); From e3e31dc87208007784b93a19f8efcdda90ea64f6 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 10 May 2023 22:35:56 +0200 Subject: [PATCH 10/21] qcow2: Unlock the graph in qcow2_do_open() where necessary qcow2_do_open() calls a few no_co_wrappers that wrap functions taking the graph lock internally as a writer. Therefore, it can't hold the reader lock across these calls, it causes deadlocks. Drop the lock temporarily around the calls. Signed-off-by: Kevin Wolf Message-Id: <20230510203601.418015-4-kwolf@redhat.com> Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/qcow2.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c index 73f36447f9..b00b4e7575 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1619,9 +1619,11 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, if (open_data_file) { /* Open external data file */ + bdrv_graph_co_rdunlock(); s->data_file = bdrv_co_open_child(NULL, options, "data-file", bs, &child_of_bds, BDRV_CHILD_DATA, true, errp); + bdrv_graph_co_rdlock(); if (*errp) { ret = -EINVAL; goto fail; @@ -1629,10 +1631,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) { if (!s->data_file && s->image_data_file) { + bdrv_graph_co_rdunlock(); s->data_file = bdrv_co_open_child(s->image_data_file, options, "data-file", bs, &child_of_bds, BDRV_CHILD_DATA, false, errp); + bdrv_graph_co_rdlock(); if (!s->data_file) { ret = -EINVAL; goto fail; @@ -1857,7 +1861,9 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, fail: g_free(s->image_data_file); if (open_data_file && has_data_file(bs)) { + bdrv_graph_co_rdunlock(); bdrv_unref_child(bs, s->data_file); + bdrv_graph_co_rdlock(); s->data_file = NULL; } g_free(s->unknown_header_fields); From 3db0c8b25c452b53aabc8efa36e655c7c02abb8f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 10 May 2023 22:35:57 +0200 Subject: [PATCH 11/21] qemu-img: Take graph lock more selectively If we take a reader lock, we can't call any functions that take a writer lock internally without causing deadlocks once the reader lock is actually enforced in the main thread, too. Take the reader lock only where it is actually needed. Signed-off-by: Kevin Wolf Message-Id: <20230510203601.418015-5-kwolf@redhat.com> Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- qemu-img.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index 9f9f0a7629..27f48051b0 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -2938,8 +2938,6 @@ static BlockGraphInfoList *collect_image_info_list(bool image_opts, } bs = blk_bs(blk); - GRAPH_RDLOCK_GUARD_MAINLOOP(); - /* * Note that the returned BlockGraphInfo object will not have * information about this image's backing node, because we have opened @@ -2947,7 +2945,10 @@ static BlockGraphInfoList *collect_image_info_list(bool image_opts, * duplicate the backing chain information that we obtain by walking * the chain manually here. */ + bdrv_graph_rdlock_main_loop(); bdrv_query_block_graph_info(bs, &info, &err); + bdrv_graph_rdunlock_main_loop(); + if (err) { error_report_err(err); blk_unref(blk); From 87f130bdaad68baad4216dbc97dec73ab4a2c4ef Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 10 May 2023 22:35:58 +0200 Subject: [PATCH 12/21] test-bdrv-drain: Take graph lock more selectively If we take a reader lock, we can't call any functions that take a writer lock internally without causing deadlocks once the reader lock is actually enforced in the main thread, too. Take the reader lock only where it is actually needed. Signed-off-by: Kevin Wolf Message-Id: <20230510203601.418015-6-kwolf@redhat.com> Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/unit/test-bdrv-drain.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index 9a4c5e59d6..ae4299ccfa 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -1004,8 +1004,6 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque) void *buffer = g_malloc(65536); QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buffer, 65536); - GRAPH_RDLOCK_GUARD(); - /* Pretend some internal write operation from parent to child. * Important: We have to read from the child, not from the parent! * Draining works by first propagating it all up the tree to the @@ -1014,7 +1012,9 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque) * everything will be drained before we go back down the tree, but * we do not want that. We want to be in the middle of draining * when this following requests returns. */ + bdrv_graph_co_rdlock(); bdrv_co_preadv(tts->wait_child, 0, 65536, &qiov, 0); + bdrv_graph_co_rdunlock(); g_assert_cmpint(bs->refcnt, ==, 1); From 01a10c243362e49afcb7acbd85a47eba64a6fc74 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 10 May 2023 22:35:59 +0200 Subject: [PATCH 13/21] test-bdrv-drain: Call bdrv_co_unref() in coroutine context bdrv_unref() is a no_coroutine_fn, so calling it from coroutine context is invalid. Use bdrv_co_unref() instead. Signed-off-by: Kevin Wolf Message-Id: <20230510203601.418015-7-kwolf@redhat.com> Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/unit/test-bdrv-drain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index ae4299ccfa..08bb0f9984 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -1019,7 +1019,7 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque) g_assert_cmpint(bs->refcnt, ==, 1); if (!dbdd->detach_instead_of_delete) { - blk_unref(blk); + blk_co_unref(blk); } else { BdrvChild *c, *next_c; QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) { From 018e5987b57589e6e9089c2d2ef31db4e7519fd5 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 10 May 2023 22:36:00 +0200 Subject: [PATCH 14/21] blockjob: Adhere to rate limit even when reentered early When jobs are sleeping, for example to enforce a given rate limit, they can be reentered early, in particular in order to get paused, to update the rate limit or to get cancelled. Before this patch, they behave in this case as if they had fully completed their rate limiting delay. This means that requests are sped up beyond their limit, violating the constraints that the user gave us. Change the block jobs to sleep in a loop until the necessary delay is completed, while still allowing cancelling them immediately as well pausing (handled by the pause point in job_sleep_ns()) and updating the rate limit. This change is also motivated by iotests cases being prone to fail because drain operations pause and unpause them so often that block jobs complete earlier than they are supposed to. In particular, the next commit would fail iotests 030 without this change. Signed-off-by: Kevin Wolf Message-Id: <20230510203601.418015-8-kwolf@redhat.com> Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/commit.c | 7 ++----- block/mirror.c | 23 ++++++++++------------- block/stream.c | 7 ++----- blockjob.c | 22 ++++++++++++++++++++-- include/block/blockjob_int.h | 14 ++++++++++---- 5 files changed, 44 insertions(+), 29 deletions(-) diff --git a/block/commit.c b/block/commit.c index 2b20fd0fd4..aa45beb0f0 100644 --- a/block/commit.c +++ b/block/commit.c @@ -116,7 +116,6 @@ static int coroutine_fn commit_run(Job *job, Error **errp) { CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); int64_t offset; - uint64_t delay_ns = 0; int ret = 0; int64_t n = 0; /* bytes */ QEMU_AUTO_VFREE void *buf = NULL; @@ -149,7 +148,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp) /* Note that even when no rate limit is applied we need to yield * with no pending I/O here so that bdrv_drain_all() returns. */ - job_sleep_ns(&s->common.job, delay_ns); + block_job_ratelimit_sleep(&s->common); if (job_is_cancelled(&s->common.job)) { break; } @@ -184,9 +183,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp) job_progress_update(&s->common.job, n); if (copy) { - delay_ns = block_job_ratelimit_get_delay(&s->common, n); - } else { - delay_ns = 0; + block_job_ratelimit_processed_bytes(&s->common, n); } } diff --git a/block/mirror.c b/block/mirror.c index 717442ca4d..b7d92d1378 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -471,12 +471,11 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, return bytes_handled; } -static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) +static void coroutine_fn mirror_iteration(MirrorBlockJob *s) { BlockDriverState *source = s->mirror_top_bs->backing->bs; MirrorOp *pseudo_op; int64_t offset; - uint64_t delay_ns = 0, ret = 0; /* At least the first dirty chunk is mirrored in one iteration. */ int nb_chunks = 1; bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)); @@ -608,16 +607,13 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) assert(io_bytes); offset += io_bytes; nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity); - delay_ns = block_job_ratelimit_get_delay(&s->common, io_bytes_acct); + block_job_ratelimit_processed_bytes(&s->common, io_bytes_acct); } - ret = delay_ns; fail: QTAILQ_REMOVE(&s->ops_in_flight, pseudo_op, next); qemu_co_queue_restart_all(&pseudo_op->waiting_requests); g_free(pseudo_op); - - return ret; } static void mirror_free_init(MirrorBlockJob *s) @@ -1011,7 +1007,6 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) assert(!s->dbi); s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap); for (;;) { - uint64_t delay_ns = 0; int64_t cnt, delta; bool should_complete; @@ -1051,7 +1046,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) mirror_wait_for_free_in_flight_slot(s); continue; } else if (cnt != 0) { - delay_ns = mirror_iteration(s); + mirror_iteration(s); } } @@ -1114,12 +1109,14 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) } if (job_is_ready(&s->common.job) && !should_complete) { - delay_ns = (s->in_flight == 0 && - cnt == 0 ? BLOCK_JOB_SLICE_TIME : 0); + if (s->in_flight == 0 && cnt == 0) { + trace_mirror_before_sleep(s, cnt, job_is_ready(&s->common.job), + BLOCK_JOB_SLICE_TIME); + job_sleep_ns(&s->common.job, BLOCK_JOB_SLICE_TIME); + } + } else { + block_job_ratelimit_sleep(&s->common); } - trace_mirror_before_sleep(s, cnt, job_is_ready(&s->common.job), - delay_ns); - job_sleep_ns(&s->common.job, delay_ns); s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); } diff --git a/block/stream.c b/block/stream.c index 7f9e1ecdbb..e522bbdec5 100644 --- a/block/stream.c +++ b/block/stream.c @@ -131,7 +131,6 @@ static int coroutine_fn stream_run(Job *job, Error **errp) BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs); int64_t len; int64_t offset = 0; - uint64_t delay_ns = 0; int error = 0; int64_t n = 0; /* bytes */ @@ -155,7 +154,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp) /* Note that even when no rate limit is applied we need to yield * with no pending I/O here so that bdrv_drain_all() returns. */ - job_sleep_ns(&s->common.job, delay_ns); + block_job_ratelimit_sleep(&s->common); if (job_is_cancelled(&s->common.job)) { break; } @@ -204,9 +203,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp) /* Publish progress */ job_progress_update(&s->common.job, n); if (copy) { - delay_ns = block_job_ratelimit_get_delay(&s->common, n); - } else { - delay_ns = 0; + block_job_ratelimit_processed_bytes(&s->common, n); } } diff --git a/blockjob.c b/blockjob.c index 659c3cb9de..913da3cbf7 100644 --- a/blockjob.c +++ b/blockjob.c @@ -319,10 +319,28 @@ static bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) return block_job_set_speed_locked(job, speed, errp); } -int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n) +void block_job_ratelimit_processed_bytes(BlockJob *job, uint64_t n) { IO_CODE(); - return ratelimit_calculate_delay(&job->limit, n); + ratelimit_calculate_delay(&job->limit, n); +} + +void block_job_ratelimit_sleep(BlockJob *job) +{ + uint64_t delay_ns; + + /* + * Sleep at least once. If the job is reentered early, keep waiting until + * we've waited for the full time that is necessary to keep the job at the + * right speed. + * + * Make sure to recalculate the delay after each (possibly interrupted) + * sleep because the speed can change while the job has yielded. + */ + do { + delay_ns = ratelimit_calculate_delay(&job->limit, 0); + job_sleep_ns(&job->job, delay_ns); + } while (delay_ns && !job_is_cancelled(&job->job)); } BlockJobInfo *block_job_query_locked(BlockJob *job, Error **errp) diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index f008446285..104824040c 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -126,12 +126,18 @@ void block_job_user_resume(Job *job); */ /** - * block_job_ratelimit_get_delay: + * block_job_ratelimit_processed_bytes: * - * Calculate and return delay for the next request in ns. See the documentation - * of ratelimit_calculate_delay() for details. + * To be called after some work has been done. Adjusts the delay for the next + * request. See the documentation of ratelimit_calculate_delay() for details. */ -int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n); +void block_job_ratelimit_processed_bytes(BlockJob *job, uint64_t n); + +/** + * Put the job to sleep (assuming that it wasn't canceled) to throttle it to the + * right speed according to its rate limiting. + */ +void block_job_ratelimit_sleep(BlockJob *job); /** * block_job_error_action: From 71438d8dac07f28c01cf6d90fce14efe04c77824 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 10 May 2023 22:36:01 +0200 Subject: [PATCH 15/21] graph-lock: Honour read locks even in the main thread There are some conditions under which we don't actually need to do anything for taking a reader lock: Writing the graph is only possible from the main context while holding the BQL. So if a reader is running in the main context under the BQL and knows that it won't be interrupted until the next writer runs, we don't actually need to do anything. This is the case if the reader code neither has a nested event loop (this is forbidden anyway while you hold the lock) nor is a coroutine (because a writer could run when the coroutine has yielded). These conditions are exactly what bdrv_graph_rdlock_main_loop() asserts. They are not fulfilled in bdrv_graph_co_rdlock(), which always runs in a coroutine. This deletes the shortcuts in bdrv_graph_co_rdlock() that skip taking the reader lock in the main thread. Reported-by: Fiona Ebner Signed-off-by: Kevin Wolf Message-Id: <20230510203601.418015-9-kwolf@redhat.com> Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/graph-lock.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/block/graph-lock.c b/block/graph-lock.c index 377884c3a9..9c42bd9799 100644 --- a/block/graph-lock.c +++ b/block/graph-lock.c @@ -162,11 +162,6 @@ void coroutine_fn bdrv_graph_co_rdlock(void) BdrvGraphRWlock *bdrv_graph; bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; - /* Do not lock if in main thread */ - if (qemu_in_main_thread()) { - return; - } - for (;;) { qatomic_set(&bdrv_graph->reader_count, bdrv_graph->reader_count + 1); @@ -230,11 +225,6 @@ void coroutine_fn bdrv_graph_co_rdunlock(void) BdrvGraphRWlock *bdrv_graph; bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; - /* Do not lock if in main thread */ - if (qemu_in_main_thread()) { - return; - } - qatomic_store_release(&bdrv_graph->reader_count, bdrv_graph->reader_count - 1); /* make sure writer sees reader_count before we check has_writer */ From 78935fcd88ec2d26d50e45043b262f0326e6d410 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 11 May 2023 16:38:01 +0200 Subject: [PATCH 16/21] iotests/245: Check if 'compress' driver is available Skip TestBlockdevReopen.test_insert_compress_filter() if the 'compress' driver isn't available. In order to make the test succeed when the case is skipped, we also need to remove any output from it (which would be missing in the case where we skip it). This is done by replacing qemu_io_log() with qemu_io(). In case of failure, qemu_io() raises an exception with the output of the qemu-io binary in its message, so we don't actually lose anything. Signed-off-by: Kevin Wolf Message-Id: <20230511143801.255021-1-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- tests/qemu-iotests/245 | 7 ++++--- tests/qemu-iotests/245.out | 9 +-------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 index edaf29094b..92b28c79be 100755 --- a/tests/qemu-iotests/245 +++ b/tests/qemu-iotests/245 @@ -611,6 +611,7 @@ class TestBlockdevReopen(iotests.QMPTestCase): self.reopen(hd0_opts, {'file': 'hd0-file'}) # Insert (and remove) a compress filter + @iotests.skip_if_unsupported(['compress']) def test_insert_compress_filter(self): # Add an image to the VM: hd (raw) -> hd0 (qcow2) -> hd0-file (file) opts = {'driver': 'raw', 'node-name': 'hd', 'file': hd_opts(0)} @@ -650,9 +651,9 @@ class TestBlockdevReopen(iotests.QMPTestCase): # Check the first byte of the first three L2 entries and verify that # the second one is compressed (0x40) while the others are not (0x80) - iotests.qemu_io_log('-f', 'raw', '-c', 'read -P 0x80 0x40000 1', - '-c', 'read -P 0x40 0x40008 1', - '-c', 'read -P 0x80 0x40010 1', hd_path[0]) + iotests.qemu_io('-f', 'raw', '-c', 'read -P 0x80 0x40000 1', + '-c', 'read -P 0x40 0x40008 1', + '-c', 'read -P 0x80 0x40010 1', hd_path[0]) # Swap the disk images of two active block devices def test_swap_files(self): diff --git a/tests/qemu-iotests/245.out b/tests/qemu-iotests/245.out index a4e04a3266..0970ced62a 100644 --- a/tests/qemu-iotests/245.out +++ b/tests/qemu-iotests/245.out @@ -10,14 +10,7 @@ {"return": {}} {"data": {"id": "stream0", "type": "stream"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} {"data": {"device": "stream0", "len": 3145728, "offset": 3145728, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -....read 1/1 bytes at offset 262144 -1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 1/1 bytes at offset 262152 -1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 1/1 bytes at offset 262160 -1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - -................ +.................... ---------------------------------------------------------------------- Ran 26 tests From 6d740fb01b9f0f5ea7a82f4d5e458d91940a19ee Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 2 May 2023 14:41:33 -0400 Subject: [PATCH 17/21] aio-posix: do not nest poll handlers QEMU's event loop supports nesting, which means that event handler functions may themselves call aio_poll(). The condition that triggered a handler must be reset before the nested aio_poll() call, otherwise the same handler will be called and immediately re-enter aio_poll. This leads to an infinite loop and stack exhaustion. Poll handlers are especially prone to this issue, because they typically reset their condition by finishing the processing of pending work. Unfortunately it is during the processing of pending work that nested aio_poll() calls typically occur and the condition has not yet been reset. Disable a poll handler during ->io_poll_ready() so that a nested aio_poll() call cannot invoke ->io_poll_ready() again. As a result, the disabled poll handler and its associated fd handler do not run during the nested aio_poll(). Calling aio_set_fd_handler() from inside nested aio_poll() could cause it to run again. If the fd handler is pending inside nested aio_poll(), then it will also run again. In theory fd handlers can be affected by the same issue, but they are more likely to reset the condition before calling nested aio_poll(). This is a special case and it's somewhat complex, but I don't see a way around it as long as nested aio_poll() is supported. Cc: qemu-stable@nongnu.org Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2186181 Fixes: c38270692593 ("block: Mark bdrv_co_io_(un)plug() and callers GRAPH_RDLOCK") Cc: Kevin Wolf Cc: Emanuele Giuseppe Esposito Cc: Paolo Bonzini Signed-off-by: Stefan Hajnoczi Message-Id: <20230502184134.534703-2-stefanha@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- util/aio-posix.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/util/aio-posix.c b/util/aio-posix.c index a8be940f76..34bc2a64d8 100644 --- a/util/aio-posix.c +++ b/util/aio-posix.c @@ -353,8 +353,19 @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node) poll_ready && revents == 0 && aio_node_check(ctx, node->is_external) && node->io_poll_ready) { + /* + * Remove temporarily to avoid infinite loops when ->io_poll_ready() + * calls aio_poll() before clearing the condition that made the poll + * handler become ready. + */ + QLIST_SAFE_REMOVE(node, node_poll); + node->io_poll_ready(node->opaque); + if (!QLIST_IS_INSERTED(node, node_poll)) { + QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll); + } + /* * Return early since revents was zero. aio_notify() does not count as * progress. From 844a12a63e12b1235a8fc17f9b278929dc6eb00e Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 2 May 2023 14:41:34 -0400 Subject: [PATCH 18/21] tested: add test for nested aio_poll() in poll handlers Cc: qemu-stable@nongnu.org Signed-off-by: Stefan Hajnoczi Message-Id: <20230502184134.534703-3-stefanha@redhat.com> [kwolf: Restrict to CONFIG_POSIX, Windows doesn't support polling] Tested-by: Kevin Wolf Signed-off-by: Kevin Wolf --- tests/unit/meson.build | 5 +- tests/unit/test-nested-aio-poll.c | 130 ++++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 tests/unit/test-nested-aio-poll.c diff --git a/tests/unit/meson.build b/tests/unit/meson.build index 48ae66011b..3a6314269b 100644 --- a/tests/unit/meson.build +++ b/tests/unit/meson.build @@ -114,7 +114,10 @@ if have_block tests += {'test-crypto-xts': [crypto, io]} endif if 'CONFIG_POSIX' in config_host - tests += {'test-image-locking': [testblock]} + tests += { + 'test-image-locking': [testblock], + 'test-nested-aio-poll': [testblock], + } endif if config_host_data.get('CONFIG_REPLICATION') tests += {'test-replication': [testblock]} diff --git a/tests/unit/test-nested-aio-poll.c b/tests/unit/test-nested-aio-poll.c new file mode 100644 index 0000000000..9bbe18b839 --- /dev/null +++ b/tests/unit/test-nested-aio-poll.c @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Test that poll handlers are not re-entrant in nested aio_poll() + * + * Copyright Red Hat + * + * Poll handlers are usually level-triggered. That means they continue firing + * until the condition is reset (e.g. a virtqueue becomes empty). If a poll + * handler calls nested aio_poll() before the condition is reset, then infinite + * recursion occurs. + * + * aio_poll() is supposed to prevent this by disabling poll handlers in nested + * aio_poll() calls. This test case checks that this is indeed what happens. + */ +#include "qemu/osdep.h" +#include "block/aio.h" +#include "qapi/error.h" + +typedef struct { + AioContext *ctx; + + /* This is the EventNotifier that drives the test */ + EventNotifier poll_notifier; + + /* This EventNotifier is only used to wake aio_poll() */ + EventNotifier dummy_notifier; + + bool nested; +} TestData; + +static void io_read(EventNotifier *notifier) +{ + fprintf(stderr, "%s %p\n", __func__, notifier); + event_notifier_test_and_clear(notifier); +} + +static bool io_poll_true(void *opaque) +{ + fprintf(stderr, "%s %p\n", __func__, opaque); + return true; +} + +static bool io_poll_false(void *opaque) +{ + fprintf(stderr, "%s %p\n", __func__, opaque); + return false; +} + +static void io_poll_ready(EventNotifier *notifier) +{ + TestData *td = container_of(notifier, TestData, poll_notifier); + + fprintf(stderr, "> %s\n", __func__); + + g_assert(!td->nested); + td->nested = true; + + /* Wake the following nested aio_poll() call */ + event_notifier_set(&td->dummy_notifier); + + /* This nested event loop must not call io_poll()/io_poll_ready() */ + g_assert(aio_poll(td->ctx, true)); + + td->nested = false; + + fprintf(stderr, "< %s\n", __func__); +} + +/* dummy_notifier never triggers */ +static void io_poll_never_ready(EventNotifier *notifier) +{ + g_assert_not_reached(); +} + +static void test(void) +{ + TestData td = { + .ctx = aio_context_new(&error_abort), + }; + + qemu_set_current_aio_context(td.ctx); + + /* Enable polling */ + aio_context_set_poll_params(td.ctx, 1000000, 2, 2, &error_abort); + + /* + * The GSource is unused but this has the side-effect of changing the fdmon + * that AioContext uses. + */ + aio_get_g_source(td.ctx); + + /* Make the event notifier active (set) right away */ + event_notifier_init(&td.poll_notifier, 1); + aio_set_event_notifier(td.ctx, &td.poll_notifier, false, + io_read, io_poll_true, io_poll_ready); + + /* This event notifier will be used later */ + event_notifier_init(&td.dummy_notifier, 0); + aio_set_event_notifier(td.ctx, &td.dummy_notifier, false, + io_read, io_poll_false, io_poll_never_ready); + + /* Consume aio_notify() */ + g_assert(!aio_poll(td.ctx, false)); + + /* + * Run the io_read() handler. This has the side-effect of activating + * polling in future aio_poll() calls. + */ + g_assert(aio_poll(td.ctx, true)); + + /* The second time around the io_poll()/io_poll_ready() handler runs */ + g_assert(aio_poll(td.ctx, true)); + + /* Run io_poll()/io_poll_ready() one more time to show it keeps working */ + g_assert(aio_poll(td.ctx, true)); + + aio_set_event_notifier(td.ctx, &td.dummy_notifier, false, + NULL, NULL, NULL); + aio_set_event_notifier(td.ctx, &td.poll_notifier, false, NULL, NULL, NULL); + event_notifier_cleanup(&td.dummy_notifier); + event_notifier_cleanup(&td.poll_notifier); + aio_context_unref(td.ctx); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + g_test_add_func("/nested-aio-poll", test); + return g_test_run(); +} From 80fc5d260002432628710f8b0c7cfc7d9b97bb9d Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 17 May 2023 17:28:32 +0200 Subject: [PATCH 19/21] graph-lock: Disable locking for now In QEMU 8.0, we've been seeing deadlocks in bdrv_graph_wrlock(). They come from callers that hold an AioContext lock, which is not allowed during polling. In theory, we could temporarily release the lock, but callers are inconsistent about whether they hold a lock, and if they do, some are also confused about which one they hold. While all of this is fixable, it's not trivial, and the best course of action for 8.0.1 is probably just disabling the graph locking code temporarily. We don't currently rely on graph locking yet. It is supposed to replace the AioContext lock eventually to enable multiqueue support, but as long as we still have the AioContext lock, it is sufficient without the graph lock. Once the AioContext lock goes away, the deadlock doesn't exist any more either and this commit can be reverted. (Of course, it can also be reverted while the AioContext lock still exists if the callers have been fixed.) Cc: qemu-stable@nongnu.org Signed-off-by: Kevin Wolf Message-Id: <20230517152834.277483-2-kwolf@redhat.com> Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/graph-lock.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/block/graph-lock.c b/block/graph-lock.c index 9c42bd9799..a92c6ae219 100644 --- a/block/graph-lock.c +++ b/block/graph-lock.c @@ -30,8 +30,10 @@ BdrvGraphLock graph_lock; /* Protects the list of aiocontext and orphaned_reader_count */ static QemuMutex aio_context_list_lock; +#if 0 /* Written and read with atomic operations. */ static int has_writer; +#endif /* * A reader coroutine could move from an AioContext to another. @@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx) g_free(ctx->bdrv_graph); } +#if 0 static uint32_t reader_count(void) { BdrvGraphRWlock *brdv_graph; @@ -105,10 +108,17 @@ static uint32_t reader_count(void) assert((int32_t)rd >= 0); return rd; } +#endif void bdrv_graph_wrlock(void) { GLOBAL_STATE_CODE(); + /* + * TODO Some callers hold an AioContext lock when this is called, which + * causes deadlocks. Reenable once the AioContext locking is cleaned up (or + * AioContext locks are gone). + */ +#if 0 assert(!qatomic_read(&has_writer)); /* Make sure that constantly arriving new I/O doesn't cause starvation */ @@ -139,11 +149,13 @@ void bdrv_graph_wrlock(void) } while (reader_count() >= 1); bdrv_drain_all_end(); +#endif } void bdrv_graph_wrunlock(void) { GLOBAL_STATE_CODE(); +#if 0 QEMU_LOCK_GUARD(&aio_context_list_lock); assert(qatomic_read(&has_writer)); @@ -155,10 +167,13 @@ void bdrv_graph_wrunlock(void) /* Wake up all coroutine that are waiting to read the graph */ qemu_co_enter_all(&reader_queue, &aio_context_list_lock); +#endif } void coroutine_fn bdrv_graph_co_rdlock(void) { + /* TODO Reenable when wrlock is reenabled */ +#if 0 BdrvGraphRWlock *bdrv_graph; bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; @@ -218,10 +233,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void) qemu_co_queue_wait(&reader_queue, &aio_context_list_lock); } } +#endif } void coroutine_fn bdrv_graph_co_rdunlock(void) { +#if 0 BdrvGraphRWlock *bdrv_graph; bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; @@ -239,6 +256,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void) if (qatomic_read(&has_writer)) { aio_wait_kick(); } +#endif } void bdrv_graph_rdlock_main_loop(void) @@ -256,13 +274,19 @@ void bdrv_graph_rdunlock_main_loop(void) void assert_bdrv_graph_readable(void) { /* reader_count() is slow due to aio_context_list_lock lock contention */ + /* TODO Reenable when wrlock is reenabled */ +#if 0 #ifdef CONFIG_DEBUG_GRAPH_LOCK assert(qemu_in_main_thread() || reader_count()); #endif +#endif } void assert_bdrv_graph_writable(void) { assert(qemu_in_main_thread()); + /* TODO Reenable when wrlock is reenabled */ +#if 0 assert(qatomic_read(&has_writer)); +#endif } From 7c1f51bf38de8cea4ed5030467646c37b46edeb7 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 17 May 2023 17:28:33 +0200 Subject: [PATCH 20/21] nbd/server: Fix drained_poll to wake coroutine in right AioContext nbd_drained_poll() generally runs in the main thread, not whatever iothread the NBD server coroutine is meant to run in, so it can't directly reenter the coroutines to wake them up. The code seems to have the right intention, it specifies the correct AioContext when it calls qemu_aio_coroutine_enter(). However, this functions doesn't schedule the coroutine to run in that AioContext, but it assumes it is already called in the home thread of the AioContext. To fix this, add a new thread-safe qio_channel_wake_read() that can be called in the main thread to wake up the coroutine in its AioContext, and use this in nbd_drained_poll(). Cc: qemu-stable@nongnu.org Signed-off-by: Kevin Wolf Message-Id: <20230517152834.277483-3-kwolf@redhat.com> Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- include/io/channel.h | 10 ++++++++++ io/channel.c | 33 +++++++++++++++++++++++++++------ nbd/server.c | 3 +-- 3 files changed, 38 insertions(+), 8 deletions(-) diff --git a/include/io/channel.h b/include/io/channel.h index 446a566e5e..229bf36910 100644 --- a/include/io/channel.h +++ b/include/io/channel.h @@ -757,6 +757,16 @@ void qio_channel_detach_aio_context(QIOChannel *ioc); void coroutine_fn qio_channel_yield(QIOChannel *ioc, GIOCondition condition); +/** + * qio_channel_wake_read: + * @ioc: the channel object + * + * If qio_channel_yield() is currently waiting for the channel to become + * readable, interrupt it and reenter immediately. This function is safe to call + * from any thread. + */ +void qio_channel_wake_read(QIOChannel *ioc); + /** * qio_channel_wait: * @ioc: the channel object diff --git a/io/channel.c b/io/channel.c index 375a130a39..72f0066af5 100644 --- a/io/channel.c +++ b/io/channel.c @@ -19,6 +19,7 @@ */ #include "qemu/osdep.h" +#include "block/aio-wait.h" #include "io/channel.h" #include "qapi/error.h" #include "qemu/main-loop.h" @@ -514,7 +515,11 @@ int qio_channel_flush(QIOChannel *ioc, static void qio_channel_restart_read(void *opaque) { QIOChannel *ioc = opaque; - Coroutine *co = ioc->read_coroutine; + Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL); + + if (!co) { + return; + } /* Assert that aio_co_wake() reenters the coroutine directly */ assert(qemu_get_current_aio_context() == @@ -525,7 +530,11 @@ static void qio_channel_restart_read(void *opaque) static void qio_channel_restart_write(void *opaque) { QIOChannel *ioc = opaque; - Coroutine *co = ioc->write_coroutine; + Coroutine *co = qatomic_xchg(&ioc->write_coroutine, NULL); + + if (!co) { + return; + } /* Assert that aio_co_wake() reenters the coroutine directly */ assert(qemu_get_current_aio_context() == @@ -568,7 +577,11 @@ void qio_channel_detach_aio_context(QIOChannel *ioc) void coroutine_fn qio_channel_yield(QIOChannel *ioc, GIOCondition condition) { + AioContext *ioc_ctx = ioc->ctx ?: qemu_get_aio_context(); + assert(qemu_in_coroutine()); + assert(in_aio_context_home_thread(ioc_ctx)); + if (condition == G_IO_IN) { assert(!ioc->read_coroutine); ioc->read_coroutine = qemu_coroutine_self(); @@ -580,18 +593,26 @@ void coroutine_fn qio_channel_yield(QIOChannel *ioc, } qio_channel_set_aio_fd_handlers(ioc); qemu_coroutine_yield(); + assert(in_aio_context_home_thread(ioc_ctx)); /* Allow interrupting the operation by reentering the coroutine other than * through the aio_fd_handlers. */ - if (condition == G_IO_IN && ioc->read_coroutine) { - ioc->read_coroutine = NULL; + if (condition == G_IO_IN) { + assert(ioc->read_coroutine == NULL); qio_channel_set_aio_fd_handlers(ioc); - } else if (condition == G_IO_OUT && ioc->write_coroutine) { - ioc->write_coroutine = NULL; + } else if (condition == G_IO_OUT) { + assert(ioc->write_coroutine == NULL); qio_channel_set_aio_fd_handlers(ioc); } } +void qio_channel_wake_read(QIOChannel *ioc) +{ + Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL); + if (co) { + aio_co_wake(co); + } +} static gboolean qio_channel_wait_complete(QIOChannel *ioc, GIOCondition condition, diff --git a/nbd/server.c b/nbd/server.c index e239c2890f..2664d43bff 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1599,8 +1599,7 @@ static bool nbd_drained_poll(void *opaque) * enter it here so we don't depend on the client to wake it up. */ if (client->recv_coroutine != NULL && client->read_yielding) { - qemu_aio_coroutine_enter(exp->common.ctx, - client->recv_coroutine); + qio_channel_wake_read(client->ioc); } return true; From 95fdd8db61848d31fde1d9b32da7f3f76babfa25 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 17 May 2023 17:28:34 +0200 Subject: [PATCH 21/21] iotests: Test commit with iothreads and ongoing I/O This tests exercises graph locking, draining, and graph modifications with AioContext switches a lot. Amongst others, it serves as a regression test for bdrv_graph_wrlock() deadlocking because it is called with a locked AioContext and for AioContext handling in the NBD server. Signed-off-by: Kevin Wolf Message-Id: <20230517152834.277483-4-kwolf@redhat.com> Tested-by: Eric Blake Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/iotests.py | 4 ++ .../qemu-iotests/tests/graph-changes-while-io | 56 +++++++++++++++++-- .../tests/graph-changes-while-io.out | 4 +- 3 files changed, 58 insertions(+), 6 deletions(-) diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 3e82c634cf..7073579a7d 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -462,6 +462,10 @@ class QemuStorageDaemon: assert self._qmp is not None return self._qmp.cmd(cmd, args) + def get_qmp(self) -> QEMUMonitorProtocol: + assert self._qmp is not None + return self._qmp + def stop(self, kill_signal=15): self._p.send_signal(kill_signal) self._p.wait() diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io index 7664f33689..750e7d4d38 100755 --- a/tests/qemu-iotests/tests/graph-changes-while-io +++ b/tests/qemu-iotests/tests/graph-changes-while-io @@ -22,19 +22,19 @@ import os from threading import Thread import iotests -from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \ - QemuStorageDaemon +from iotests import imgfmt, qemu_img, qemu_img_create, qemu_io, \ + QMPTestCase, QemuStorageDaemon top = os.path.join(iotests.test_dir, 'top.img') nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') -def do_qemu_img_bench() -> None: +def do_qemu_img_bench(count: int = 2000000) -> None: """ Do some I/O requests on `nbd_sock`. """ - qemu_img('bench', '-f', 'raw', '-c', '2000000', + qemu_img('bench', '-f', 'raw', '-c', str(count), f'nbd+unix:///node0?socket={nbd_sock}') @@ -84,6 +84,54 @@ class TestGraphChangesWhileIO(QMPTestCase): bench_thr.join() + def test_commit_while_io(self) -> None: + # Run qemu-img bench in the background + bench_thr = Thread(target=do_qemu_img_bench, args=(200000, )) + bench_thr.start() + + qemu_io('-c', 'write 0 64k', top) + qemu_io('-c', 'write 128k 64k', top) + + result = self.qsd.qmp('blockdev-add', { + 'driver': imgfmt, + 'node-name': 'overlay', + 'backing': None, + 'file': { + 'driver': 'file', + 'filename': top + } + }) + self.assert_qmp(result, 'return', {}) + + result = self.qsd.qmp('blockdev-snapshot', { + 'node': 'node0', + 'overlay': 'overlay', + }) + self.assert_qmp(result, 'return', {}) + + # While qemu-img bench is running, repeatedly commit overlay to node0 + while bench_thr.is_alive(): + result = self.qsd.qmp('block-commit', { + 'job-id': 'job0', + 'device': 'overlay', + }) + self.assert_qmp(result, 'return', {}) + + result = self.qsd.qmp('block-job-cancel', { + 'device': 'job0', + }) + self.assert_qmp(result, 'return', {}) + + cancelled = False + while not cancelled: + for event in self.qsd.get_qmp().get_events(wait=10.0): + if event['event'] != 'JOB_STATUS_CHANGE': + continue + if event['data']['status'] == 'null': + cancelled = True + + bench_thr.join() + if __name__ == '__main__': # Format must support raw backing files iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'], diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out index ae1213e6f8..fbc63e62f8 100644 --- a/tests/qemu-iotests/tests/graph-changes-while-io.out +++ b/tests/qemu-iotests/tests/graph-changes-while-io.out @@ -1,5 +1,5 @@ -. +.. ---------------------------------------------------------------------- -Ran 1 tests +Ran 2 tests OK