From b660a84bbb0eb1a76b505648d31d5e82594fb75e Mon Sep 17 00:00:00 2001 From: Stefan Reiter Date: Tue, 7 Apr 2020 13:56:49 +0200 Subject: [PATCH 1/7] job: take each job's lock individually in job_txn_apply All callers of job_txn_apply hold a single job's lock, but different jobs within a transaction can have different contexts, thus we need to lock each one individually before applying the callback function. Similar to job_completed_txn_abort this also requires releasing the caller's context before and reacquiring it after to avoid recursive locks which might break AIO_WAIT_WHILE in the callback. This is safe, since existing code would already have to take this into account, lest job_completed_txn_abort might have broken. This also brings to light a different issue: When a callback function in job_txn_apply moves it's job to a different AIO context, callers will try to release the wrong lock (now that we re-acquire the lock correctly, previously it would just continue with the old lock, leaving the job unlocked for the rest of the return path). Fix this by not caching the job's context. This is only necessary for qmp_block_job_finalize, qmp_job_finalize and job_exit, since everyone else calls through job_exit. One test needed adapting, since it calls job_finalize directly, so it manually needs to acquire the correct context. Signed-off-by: Stefan Reiter Message-Id: <20200407115651.69472-2-s.reiter@proxmox.com> Signed-off-by: Kevin Wolf --- blockdev.c | 9 ++++++++ job-qmp.c | 9 ++++++++ job.c | 50 ++++++++++++++++++++++++++++++++++--------- tests/test-blockjob.c | 2 ++ 4 files changed, 60 insertions(+), 10 deletions(-) diff --git a/blockdev.c b/blockdev.c index fa8630cb41..5faddaa705 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3612,7 +3612,16 @@ void qmp_block_job_finalize(const char *id, Error **errp) } trace_qmp_block_job_finalize(job); + job_ref(&job->job); job_finalize(&job->job, errp); + + /* + * Job's context might have changed via job_finalize (and job_txn_apply + * automatically acquires the new one), so make sure we release the correct + * one. + */ + aio_context = blk_get_aio_context(job->blk); + job_unref(&job->job); aio_context_release(aio_context); } diff --git a/job-qmp.c b/job-qmp.c index fecc939ebd..f9a58832e1 100644 --- a/job-qmp.c +++ b/job-qmp.c @@ -114,7 +114,16 @@ void qmp_job_finalize(const char *id, Error **errp) } trace_qmp_job_finalize(job); + job_ref(job); job_finalize(job, errp); + + /* + * Job's context might have changed via job_finalize (and job_txn_apply + * automatically acquires the new one), so make sure we release the correct + * one. + */ + aio_context = job->aio_context; + job_unref(job); aio_context_release(aio_context); } diff --git a/job.c b/job.c index 134a07b92e..53be57a3a0 100644 --- a/job.c +++ b/job.c @@ -136,17 +136,38 @@ static void job_txn_del_job(Job *job) } } -static int job_txn_apply(JobTxn *txn, int fn(Job *)) +static int job_txn_apply(Job *job, int fn(Job *)) { - Job *job, *next; + AioContext *inner_ctx; + Job *other_job, *next; + JobTxn *txn = job->txn; int rc = 0; - QLIST_FOREACH_SAFE(job, &txn->jobs, txn_list, next) { - rc = fn(job); + /* + * Similar to job_completed_txn_abort, we take each job's lock before + * applying fn, but since we assume that outer_ctx is held by the caller, + * we need to release it here to avoid holding the lock twice - which would + * break AIO_WAIT_WHILE from within fn. + */ + job_ref(job); + aio_context_release(job->aio_context); + + QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { + inner_ctx = other_job->aio_context; + aio_context_acquire(inner_ctx); + rc = fn(other_job); + aio_context_release(inner_ctx); if (rc) { break; } } + + /* + * Note that job->aio_context might have been changed by calling fn, so we + * can't use a local variable to cache it. + */ + aio_context_acquire(job->aio_context); + job_unref(job); return rc; } @@ -774,11 +795,11 @@ static void job_do_finalize(Job *job) assert(job && job->txn); /* prepare the transaction to complete */ - rc = job_txn_apply(job->txn, job_prepare); + rc = job_txn_apply(job, job_prepare); if (rc) { job_completed_txn_abort(job); } else { - job_txn_apply(job->txn, job_finalize_single); + job_txn_apply(job, job_finalize_single); } } @@ -824,10 +845,10 @@ static void job_completed_txn_success(Job *job) assert(other_job->ret == 0); } - job_txn_apply(txn, job_transition_to_pending); + job_txn_apply(job, job_transition_to_pending); /* If no jobs need manual finalization, automatically do so */ - if (job_txn_apply(txn, job_needs_finalize) == 0) { + if (job_txn_apply(job, job_needs_finalize) == 0) { job_do_finalize(job); } } @@ -849,9 +870,10 @@ static void job_completed(Job *job) static void job_exit(void *opaque) { Job *job = (Job *)opaque; - AioContext *ctx = job->aio_context; + AioContext *ctx; - aio_context_acquire(ctx); + job_ref(job); + aio_context_acquire(job->aio_context); /* This is a lie, we're not quiescent, but still doing the completion * callbacks. However, completion callbacks tend to involve operations that @@ -862,6 +884,14 @@ static void job_exit(void *opaque) job_completed(job); + /* + * Note that calling job_completed can move the job to a different + * aio_context, so we cannot cache from above. job_txn_apply takes care of + * acquiring the new lock, and we ref/unref to avoid job_completed freeing + * the job underneath us. + */ + ctx = job->aio_context; + job_unref(job); aio_context_release(ctx); } diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c index 4eeb184caf..7519847912 100644 --- a/tests/test-blockjob.c +++ b/tests/test-blockjob.c @@ -367,7 +367,9 @@ static void test_cancel_concluded(void) aio_poll(qemu_get_aio_context(), true); assert(job->status == JOB_STATUS_PENDING); + aio_context_acquire(job->aio_context); job_finalize(job, &error_abort); + aio_context_release(job->aio_context); assert(job->status == JOB_STATUS_CONCLUDED); cancel_common(s); From 08558e33257ec796594bd411261028a93414a70c Mon Sep 17 00:00:00 2001 From: Stefan Reiter Date: Tue, 7 Apr 2020 13:56:50 +0200 Subject: [PATCH 2/7] replication: assert we own context before job_cancel_sync job_cancel_sync requires the job's lock to be held, all other callers already do this (replication_stop, drive_backup_abort, blockdev_backup_abort, job_cancel_sync_all, cancel_common). In this case we're in a BlockDriver handler, so we already have a lock, just assert that it is the same as the one used for the commit_job. Signed-off-by: Stefan Reiter Message-Id: <20200407115651.69472-3-s.reiter@proxmox.com> Signed-off-by: Kevin Wolf --- block/replication.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/block/replication.c b/block/replication.c index 413d95407d..da013c2041 100644 --- a/block/replication.c +++ b/block/replication.c @@ -144,12 +144,15 @@ fail: static void replication_close(BlockDriverState *bs) { BDRVReplicationState *s = bs->opaque; + Job *commit_job; if (s->stage == BLOCK_REPLICATION_RUNNING) { replication_stop(s->rs, false, NULL); } if (s->stage == BLOCK_REPLICATION_FAILOVER) { - job_cancel_sync(&s->commit_job->job); + commit_job = &s->commit_job->job; + assert(commit_job->aio_context == qemu_get_current_aio_context()); + job_cancel_sync(commit_job); } if (s->mode == REPLICATION_MODE_SECONDARY) { From eca0f3524a4eb57d03a56b0cbcef5527a0981ce4 Mon Sep 17 00:00:00 2001 From: Stefan Reiter Date: Tue, 7 Apr 2020 13:56:51 +0200 Subject: [PATCH 3/7] backup: don't acquire aio_context in backup_clean All code-paths leading to backup_clean (via job_clean) have the job's context already acquired. The job's context is guaranteed to be the same as the one used by backup_top via backup_job_create. Since the previous logic effectively acquired the lock twice, this broke cleanup of backups for disks using IO threads, since the BDRV_POLL_WHILE in bdrv_backup_top_drop -> bdrv_do_drained_begin would only release the lock once, thus deadlocking with the IO thread. This is a partial revert of 0abf2581717a19. Signed-off-by: Stefan Reiter Reviewed-by: Max Reitz Message-Id: <20200407115651.69472-4-s.reiter@proxmox.com> Signed-off-by: Kevin Wolf --- block/backup.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/block/backup.c b/block/backup.c index 7430ca5883..a7a7dcaf4c 100644 --- a/block/backup.c +++ b/block/backup.c @@ -126,11 +126,7 @@ static void backup_abort(Job *job) static void backup_clean(Job *job) { BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); - AioContext *aio_context = bdrv_get_aio_context(s->backup_top); - - aio_context_acquire(aio_context); bdrv_backup_top_drop(s->backup_top); - aio_context_release(aio_context); } void backup_do_checkpoint(BlockJob *job, Error **errp) From 564806c529d4e0acad209b1e5b864a8886092f1f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 7 Apr 2020 14:12:57 +0200 Subject: [PATCH 4/7] block-backend: Reorder flush/pdiscard function definitions Move all variants of the flush/pdiscard functions to a single place and put the blk_co_*() version first because it is called by all other variants (and will become static in the next patch). Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Max Reitz Message-Id: <20200407121259.21350-2-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block/block-backend.c | 92 +++++++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 8b8f2a80a0..17b2e87afa 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1488,38 +1488,6 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, blk_aio_write_entry, flags, cb, opaque); } -static void blk_aio_flush_entry(void *opaque) -{ - BlkAioEmAIOCB *acb = opaque; - BlkRwCo *rwco = &acb->rwco; - - rwco->ret = blk_co_flush(rwco->blk); - blk_aio_complete(acb); -} - -BlockAIOCB *blk_aio_flush(BlockBackend *blk, - BlockCompletionFunc *cb, void *opaque) -{ - return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); -} - -static void blk_aio_pdiscard_entry(void *opaque) -{ - BlkAioEmAIOCB *acb = opaque; - BlkRwCo *rwco = &acb->rwco; - - rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); - blk_aio_complete(acb); -} - -BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, - int64_t offset, int bytes, - BlockCompletionFunc *cb, void *opaque) -{ - return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, - cb, opaque); -} - void blk_aio_cancel(BlockAIOCB *acb) { bdrv_aio_cancel(acb); @@ -1586,6 +1554,37 @@ int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) return bdrv_co_pdiscard(blk->root, offset, bytes); } +static void blk_aio_pdiscard_entry(void *opaque) +{ + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + + rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); + blk_aio_complete(acb); +} + +BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, + int64_t offset, int bytes, + BlockCompletionFunc *cb, void *opaque) +{ + return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, + cb, opaque); +} + +static void blk_pdiscard_entry(void *opaque) +{ + BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; + + rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); + aio_wait_kick(); +} + +int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) +{ + return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); +} + int blk_co_flush(BlockBackend *blk) { blk_wait_while_drained(blk); @@ -1597,6 +1596,21 @@ int blk_co_flush(BlockBackend *blk) return bdrv_co_flush(blk_bs(blk)); } +static void blk_aio_flush_entry(void *opaque) +{ + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + + rwco->ret = blk_co_flush(rwco->blk); + blk_aio_complete(acb); +} + +BlockAIOCB *blk_aio_flush(BlockBackend *blk, + BlockCompletionFunc *cb, void *opaque) +{ + return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); +} + static void blk_flush_entry(void *opaque) { BlkRwCo *rwco = opaque; @@ -2083,20 +2097,6 @@ int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, return bdrv_truncate(blk->root, offset, exact, prealloc, errp); } -static void blk_pdiscard_entry(void *opaque) -{ - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; - - rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); - aio_wait_kick(); -} - -int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) -{ - return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); -} - int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, int64_t pos, int size) { From fbb92b6798894d3bf62fe3578d99fa62c720b242 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 7 Apr 2020 14:12:58 +0200 Subject: [PATCH 5/7] block: Increase BB.in_flight for coroutine and sync interfaces External callers of blk_co_*() and of the synchronous blk_*() functions don't currently increase the BlockBackend.in_flight counter, but calls from blk_aio_*() do, so there is an inconsistency whether the counter has been increased or not. This patch moves the actual operations to static functions that can later know they will always be called with in_flight increased exactly once, even for external callers using the blk_co_*() coroutine interfaces. If the public blk_co_*() interface is unused, remove it. Signed-off-by: Kevin Wolf Message-Id: <20200407121259.21350-3-kwolf@redhat.com> Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- block/block-backend.c | 103 +++++++++++++++++++++++++-------- include/sysemu/block-backend.h | 1 - 2 files changed, 80 insertions(+), 24 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 17b2e87afa..610dbfa0b2 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1147,9 +1147,10 @@ static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) } } -int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, - unsigned int bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags) +/* To be called between exactly one pair of blk_inc/dec_in_flight() */ +static int coroutine_fn +blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { int ret; BlockDriverState *bs; @@ -1178,10 +1179,24 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, return ret; } -int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, - unsigned int bytes, - QEMUIOVector *qiov, size_t qiov_offset, - BdrvRequestFlags flags) +int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, + unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) +{ + int ret; + + blk_inc_in_flight(blk); + ret = blk_do_preadv(blk, offset, bytes, qiov, flags); + blk_dec_in_flight(blk); + + return ret; +} + +/* To be called between exactly one pair of blk_inc/dec_in_flight() */ +static int coroutine_fn +blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags) { int ret; BlockDriverState *bs; @@ -1214,6 +1229,20 @@ int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, return ret; } +int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, + unsigned int bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags) +{ + int ret; + + blk_inc_in_flight(blk); + ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags); + blk_dec_in_flight(blk); + + return ret; +} + int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) @@ -1234,7 +1263,7 @@ static void blk_read_entry(void *opaque) BlkRwCo *rwco = opaque; QEMUIOVector *qiov = rwco->iobuf; - rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size, + rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, qiov->size, qiov, rwco->flags); aio_wait_kick(); } @@ -1244,8 +1273,8 @@ static void blk_write_entry(void *opaque) BlkRwCo *rwco = opaque; QEMUIOVector *qiov = rwco->iobuf; - rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size, - qiov, rwco->flags); + rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, qiov->size, + qiov, 0, rwco->flags); aio_wait_kick(); } @@ -1262,6 +1291,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, .ret = NOT_DONE, }; + blk_inc_in_flight(blk); if (qemu_in_coroutine()) { /* Fast-path if already in coroutine context */ co_entry(&rwco); @@ -1270,6 +1300,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, bdrv_coroutine_enter(blk_bs(blk), co); BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE); } + blk_dec_in_flight(blk); return rwco.ret; } @@ -1394,7 +1425,7 @@ static void blk_aio_read_entry(void *opaque) } assert(qiov->size == acb->bytes); - rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, + rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, qiov, rwco->flags); blk_aio_complete(acb); } @@ -1412,8 +1443,8 @@ static void blk_aio_write_entry(void *opaque) } assert(!qiov || qiov->size == acb->bytes); - rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, - qiov, rwco->flags); + rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, + qiov, 0, rwco->flags); blk_aio_complete(acb); } @@ -1498,7 +1529,9 @@ void blk_aio_cancel_async(BlockAIOCB *acb) bdrv_aio_cancel_async(acb); } -int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf) +/* To be called between exactly one pair of blk_inc/dec_in_flight() */ +static int coroutine_fn +blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf) { blk_wait_while_drained(blk); @@ -1514,8 +1547,7 @@ static void blk_ioctl_entry(void *opaque) BlkRwCo *rwco = opaque; QEMUIOVector *qiov = rwco->iobuf; - rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, - qiov->iov[0].iov_base); + rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, qiov->iov[0].iov_base); aio_wait_kick(); } @@ -1529,7 +1561,7 @@ static void blk_aio_ioctl_entry(void *opaque) BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; - rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf); + rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf); blk_aio_complete(acb); } @@ -1540,7 +1572,9 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); } -int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) +/* To be called between exactly one pair of blk_inc/dec_in_flight() */ +static int coroutine_fn +blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes) { int ret; @@ -1559,7 +1593,7 @@ static void blk_aio_pdiscard_entry(void *opaque) BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; - rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); + rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, acb->bytes); blk_aio_complete(acb); } @@ -1571,12 +1605,23 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, cb, opaque); } +int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) +{ + int ret; + + blk_inc_in_flight(blk); + ret = blk_do_pdiscard(blk, offset, bytes); + blk_dec_in_flight(blk); + + return ret; +} + static void blk_pdiscard_entry(void *opaque) { BlkRwCo *rwco = opaque; QEMUIOVector *qiov = rwco->iobuf; - rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); + rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, qiov->size); aio_wait_kick(); } @@ -1585,7 +1630,8 @@ int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); } -int blk_co_flush(BlockBackend *blk) +/* To be called between exactly one pair of blk_inc/dec_in_flight() */ +static int coroutine_fn blk_do_flush(BlockBackend *blk) { blk_wait_while_drained(blk); @@ -1601,7 +1647,7 @@ static void blk_aio_flush_entry(void *opaque) BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; - rwco->ret = blk_co_flush(rwco->blk); + rwco->ret = blk_do_flush(rwco->blk); blk_aio_complete(acb); } @@ -1611,10 +1657,21 @@ BlockAIOCB *blk_aio_flush(BlockBackend *blk, return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); } +int coroutine_fn blk_co_flush(BlockBackend *blk) +{ + int ret; + + blk_inc_in_flight(blk); + ret = blk_do_flush(blk); + blk_dec_in_flight(blk); + + return ret; +} + static void blk_flush_entry(void *opaque) { BlkRwCo *rwco = opaque; - rwco->ret = blk_co_flush(rwco->blk); + rwco->ret = blk_do_flush(rwco->blk); aio_wait_kick(); } diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index b198deca0b..9bbdbd63d7 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -171,7 +171,6 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int bytes, BlockCompletionFunc *cb, void *opaque); void blk_aio_cancel(BlockAIOCB *acb); void blk_aio_cancel_async(BlockAIOCB *acb); -int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf); int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf); BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, BlockCompletionFunc *cb, void *opaque); From 7f16476fab14fc32388e0ebae793f64673848efa Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 7 Apr 2020 14:12:59 +0200 Subject: [PATCH 6/7] block: Fix blk->in_flight during blk_wait_while_drained() Waiting in blk_wait_while_drained() while blk->in_flight is increased for the current request is wrong because it will cause the drain operation to deadlock. This patch makes sure that blk_wait_while_drained() is called with blk->in_flight increased exactly once for the current request, and that it temporarily decreases the counter while it waits. Fixes: cf3129323f900ef5ddbccbe86e4fa801e88c566e Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Max Reitz Message-Id: <20200407121259.21350-4-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block/block-backend.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 610dbfa0b2..38ae413826 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1140,10 +1140,15 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, return 0; } +/* To be called between exactly one pair of blk_inc/dec_in_flight() */ static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) { + assert(blk->in_flight > 0); + if (blk->quiesce_counter && !blk->disable_request_queuing) { + blk_dec_in_flight(blk); qemu_co_queue_wait(&blk->queued_requests, NULL); + blk_inc_in_flight(blk); } } @@ -1418,12 +1423,6 @@ static void blk_aio_read_entry(void *opaque) BlkRwCo *rwco = &acb->rwco; QEMUIOVector *qiov = rwco->iobuf; - if (rwco->blk->quiesce_counter) { - blk_dec_in_flight(rwco->blk); - blk_wait_while_drained(rwco->blk); - blk_inc_in_flight(rwco->blk); - } - assert(qiov->size == acb->bytes); rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, qiov, rwco->flags); @@ -1436,12 +1435,6 @@ static void blk_aio_write_entry(void *opaque) BlkRwCo *rwco = &acb->rwco; QEMUIOVector *qiov = rwco->iobuf; - if (rwco->blk->quiesce_counter) { - blk_dec_in_flight(rwco->blk); - blk_wait_while_drained(rwco->blk); - blk_inc_in_flight(rwco->blk); - } - assert(!qiov || qiov->size == acb->bytes); rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, qiov, 0, rwco->flags); From 3f6de653b946fe849330208becf79d6af7e876cb Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 2 Apr 2020 11:36:03 +0200 Subject: [PATCH 7/7] vpc: Don't round up already aligned BAT sizes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As reported on Launchpad, Azure apparently doesn't accept images for upload that are not both aligned to 1 MB blocks and have a BAT size that matches the image size exactly. As far as I can tell, there is no real reason why we create a BAT that is one entry longer than necessary for aligned image sizes, so change that. (Even though the condition is only mentioned as "should" in the spec and previous products accepted larger BATs - but we'll try to maintain compatibility with as many of Microsoft's ever-changing interpretations of the VHD spec as possible.) Fixes: https://bugs.launchpad.net/bugs/1870098 Reported-by: Tobias Witek Signed-off-by: Kevin Wolf Message-Id: <20200402093603.2369-1-kwolf@redhat.com> Reviewed-by: Max Reitz Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Kevin Wolf --- block/vpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/vpc.c b/block/vpc.c index 6df75e22dc..d8141b52da 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -835,7 +835,7 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf, /* Write the footer (twice: at the beginning and at the end) */ block_size = 0x200000; - num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512); + num_bat_entries = DIV_ROUND_UP(total_sectors, block_size / 512); ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0); if (ret < 0) {