From 6573d9c63885aaf533366ab5c68318d1cf1a0fcc Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 May 2017 16:13:00 +0200 Subject: [PATCH 01/12] blockjob: remove unnecessary check !job is always checked prior to the call, drop it from here. Reviewed-by: Stefan Hajnoczi Signed-off-by: Paolo Bonzini Reviewed-by: Jeff Cody Message-id: 20170508141310.8674-2-pbonzini@redhat.com Signed-off-by: Jeff Cody --- blockjob.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockjob.c b/blockjob.c index 6e489327ff..23022b3331 100644 --- a/blockjob.c +++ b/blockjob.c @@ -480,7 +480,7 @@ static bool block_job_should_pause(BlockJob *job) bool block_job_user_paused(BlockJob *job) { - return job ? job->user_paused : 0; + return job->user_paused; } void coroutine_fn block_job_pause_point(BlockJob *job) From 9f086abbe43d29de2409e01d8fb2d68a6102afbc Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 May 2017 16:13:01 +0200 Subject: [PATCH 02/12] blockjob: remove iostatus_reset callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is unused since commit 66a0fae ("blockjob: Don't touch BDS iostatus", 2016-05-19). Reviewed-by: Stefan Hajnoczi Reviewed-by: John Snow Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini Reviewed-by: Jeff Cody Message-id: 20170508141310.8674-3-pbonzini@redhat.com Signed-off-by: Jeff Cody --- blockjob.c | 3 --- include/block/blockjob_int.h | 3 --- 2 files changed, 6 deletions(-) diff --git a/blockjob.c b/blockjob.c index 23022b3331..71187d0c9e 100644 --- a/blockjob.c +++ b/blockjob.c @@ -555,9 +555,6 @@ bool block_job_is_cancelled(BlockJob *job) void block_job_iostatus_reset(BlockJob *job) { job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; - if (job->driver->iostatus_reset) { - job->driver->iostatus_reset(job); - } } static int block_job_finish_sync(BlockJob *job, diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index 3f86cc5acc..bfcc5d1241 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -44,9 +44,6 @@ struct BlockJobDriver { /** Optional callback for job types that support setting a speed limit */ void (*set_speed)(BlockJob *job, int64_t speed, Error **errp); - /** Optional callback for job types that need to forward I/O status reset */ - void (*iostatus_reset)(BlockJob *job); - /** Mandatory: Entrypoint for the Coroutine. */ CoroutineEntry *start; From 05b0d8e3b8a1ed1a5840a317ce4f7f4ebe53156f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 May 2017 16:13:02 +0200 Subject: [PATCH 03/12] blockjob: introduce block_job_early_fail Outside blockjob.c, block_job_unref is only used when a block job fails to start, and block_job_ref is not used at all. The reference counting thus is pretty well hidden. Introduce a separate function to be used by block jobs; because block_job_ref and block_job_unref now become static, move them earlier in blockjob.c. Reviewed-by: Stefan Hajnoczi Reviewed-by: John Snow Signed-off-by: Paolo Bonzini Reviewed-by: Jeff Cody Message-id: 20170508141310.8674-4-pbonzini@redhat.com Signed-off-by: Jeff Cody --- block/backup.c | 2 +- block/commit.c | 2 +- block/mirror.c | 2 +- blockjob.c | 47 +++++++++++++++++++++--------------- include/block/blockjob_int.h | 15 +++--------- tests/test-blockjob.c | 10 ++++---- 6 files changed, 39 insertions(+), 39 deletions(-) diff --git a/block/backup.c b/block/backup.c index a4fb2884f9..5387fbd84e 100644 --- a/block/backup.c +++ b/block/backup.c @@ -692,7 +692,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, } if (job) { backup_clean(&job->common); - block_job_unref(&job->common); + block_job_early_fail(&job->common); } return NULL; diff --git a/block/commit.c b/block/commit.c index 76a0d98c6f..a3028b20f3 100644 --- a/block/commit.c +++ b/block/commit.c @@ -426,7 +426,7 @@ fail: if (commit_top_bs) { bdrv_set_backing_hd(overlay_bs, top, &error_abort); } - block_job_unref(&s->common); + block_job_early_fail(&s->common); } diff --git a/block/mirror.c b/block/mirror.c index e86f8f8ad7..b9eb2a2ddb 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1252,7 +1252,7 @@ fail: g_free(s->replaces); blk_unref(s->target); - block_job_unref(&s->common); + block_job_early_fail(&s->common); } bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL, diff --git a/blockjob.c b/blockjob.c index 71187d0c9e..5a722c3635 100644 --- a/blockjob.c +++ b/blockjob.c @@ -106,6 +106,32 @@ BlockJob *block_job_get(const char *id) return NULL; } +static void block_job_ref(BlockJob *job) +{ + ++job->refcnt; +} + +static void block_job_attached_aio_context(AioContext *new_context, + void *opaque); +static void block_job_detach_aio_context(void *opaque); + +static void block_job_unref(BlockJob *job) +{ + if (--job->refcnt == 0) { + BlockDriverState *bs = blk_bs(job->blk); + bs->job = NULL; + block_job_remove_all_bdrv(job); + blk_remove_aio_context_notifier(job->blk, + block_job_attached_aio_context, + block_job_detach_aio_context, job); + blk_unref(job->blk); + error_free(job->blocker); + g_free(job->id); + QLIST_REMOVE(job, job_list); + g_free(job); + } +} + static void block_job_attached_aio_context(AioContext *new_context, void *opaque) { @@ -293,26 +319,9 @@ void block_job_start(BlockJob *job) bdrv_coroutine_enter(blk_bs(job->blk), job->co); } -void block_job_ref(BlockJob *job) +void block_job_early_fail(BlockJob *job) { - ++job->refcnt; -} - -void block_job_unref(BlockJob *job) -{ - if (--job->refcnt == 0) { - BlockDriverState *bs = blk_bs(job->blk); - bs->job = NULL; - block_job_remove_all_bdrv(job); - blk_remove_aio_context_notifier(job->blk, - block_job_attached_aio_context, - block_job_detach_aio_context, job); - blk_unref(job->blk); - error_free(job->blocker); - g_free(job->id); - QLIST_REMOVE(job, job_list); - g_free(job); - } + block_job_unref(job); } static void block_job_completed_single(BlockJob *job) diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index bfcc5d1241..45cdfd4ac1 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -156,21 +156,12 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns); void block_job_yield(BlockJob *job); /** - * block_job_ref: + * block_job_early_fail: * @bs: The block device. * - * Grab a reference to the block job. Should be paired with block_job_unref. + * The block job could not be started, free it. */ -void block_job_ref(BlockJob *job); - -/** - * block_job_unref: - * @bs: The block device. - * - * Release reference to the block job and release resources if it is the last - * reference. - */ -void block_job_unref(BlockJob *job); +void block_job_early_fail(BlockJob *job); /** * block_job_completed: diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c index 740e740398..23bdf1a932 100644 --- a/tests/test-blockjob.c +++ b/tests/test-blockjob.c @@ -116,11 +116,11 @@ static void test_job_ids(void) job[1] = do_test_id(blk[1], "id0", false); /* But once job[0] finishes we can reuse its ID */ - block_job_unref(job[0]); + block_job_early_fail(job[0]); job[1] = do_test_id(blk[1], "id0", true); /* No job ID specified, defaults to the backend name ('drive1') */ - block_job_unref(job[1]); + block_job_early_fail(job[1]); job[1] = do_test_id(blk[1], NULL, true); /* Duplicate job ID */ @@ -133,9 +133,9 @@ static void test_job_ids(void) /* This one is valid */ job[2] = do_test_id(blk[2], "id_2", true); - block_job_unref(job[0]); - block_job_unref(job[1]); - block_job_unref(job[2]); + block_job_early_fail(job[0]); + block_job_early_fail(job[1]); + block_job_early_fail(job[2]); destroy_blk(blk[0]); destroy_blk(blk[1]); From f321dcb57f50be177250c7c5ba4b6334e9cb6ca5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 May 2017 16:13:03 +0200 Subject: [PATCH 04/12] blockjob: introduce block_job_pause/resume_all Remove use of block_job_pause/resume from outside blockjob.c, thus making them static. The new functions are used by the block layer, so place them in blockjob_int.h. Reviewed-by: Stefan Hajnoczi Reviewed-by: John Snow Signed-off-by: Paolo Bonzini Reviewed-by: Jeff Cody Message-id: 20170508141310.8674-5-pbonzini@redhat.com Signed-off-by: Jeff Cody --- block/io.c | 19 +----- blockjob.c | 114 +++++++++++++++++++++-------------- include/block/blockjob.h | 16 ----- include/block/blockjob_int.h | 14 +++++ 4 files changed, 86 insertions(+), 77 deletions(-) diff --git a/block/io.c b/block/io.c index fdd7485c22..ed31810c0a 100644 --- a/block/io.c +++ b/block/io.c @@ -26,6 +26,7 @@ #include "trace.h" #include "sysemu/block-backend.h" #include "block/blockjob.h" +#include "block/blockjob_int.h" #include "block/block_int.h" #include "qemu/cutils.h" #include "qapi/error.h" @@ -301,16 +302,9 @@ void bdrv_drain_all_begin(void) bool waited = true; BlockDriverState *bs; BdrvNextIterator it; - BlockJob *job = NULL; GSList *aio_ctxs = NULL, *ctx; - while ((job = block_job_next(job))) { - AioContext *aio_context = blk_get_aio_context(job->blk); - - aio_context_acquire(aio_context); - block_job_pause(job); - aio_context_release(aio_context); - } + block_job_pause_all(); for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); @@ -354,7 +348,6 @@ void bdrv_drain_all_end(void) { BlockDriverState *bs; BdrvNextIterator it; - BlockJob *job = NULL; for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); @@ -365,13 +358,7 @@ void bdrv_drain_all_end(void) aio_context_release(aio_context); } - while ((job = block_job_next(job))) { - AioContext *aio_context = blk_get_aio_context(job->blk); - - aio_context_acquire(aio_context); - block_job_resume(job); - aio_context_release(aio_context); - } + block_job_resume_all(); } void bdrv_drain_all(void) diff --git a/blockjob.c b/blockjob.c index 5a722c3635..85ad610cae 100644 --- a/blockjob.c +++ b/blockjob.c @@ -55,36 +55,6 @@ struct BlockJobTxn { static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs); -static char *child_job_get_parent_desc(BdrvChild *c) -{ - BlockJob *job = c->opaque; - return g_strdup_printf("%s job '%s'", - BlockJobType_lookup[job->driver->job_type], - job->id); -} - -static const BdrvChildRole child_job = { - .get_parent_desc = child_job_get_parent_desc, - .stay_at_node = true, -}; - -static void block_job_drained_begin(void *opaque) -{ - BlockJob *job = opaque; - block_job_pause(job); -} - -static void block_job_drained_end(void *opaque) -{ - BlockJob *job = opaque; - block_job_resume(job); -} - -static const BlockDevOps block_job_dev_ops = { - .drained_begin = block_job_drained_begin, - .drained_end = block_job_drained_end, -}; - BlockJob *block_job_next(BlockJob *job) { if (!job) { @@ -106,6 +76,21 @@ BlockJob *block_job_get(const char *id) return NULL; } +static void block_job_pause(BlockJob *job) +{ + job->pause_count++; +} + +static void block_job_resume(BlockJob *job) +{ + assert(job->pause_count > 0); + job->pause_count--; + if (job->pause_count) { + return; + } + block_job_enter(job); +} + static void block_job_ref(BlockJob *job) { ++job->refcnt; @@ -171,6 +156,36 @@ static void block_job_detach_aio_context(void *opaque) block_job_unref(job); } +static char *child_job_get_parent_desc(BdrvChild *c) +{ + BlockJob *job = c->opaque; + return g_strdup_printf("%s job '%s'", + BlockJobType_lookup[job->driver->job_type], + job->id); +} + +static const BdrvChildRole child_job = { + .get_parent_desc = child_job_get_parent_desc, + .stay_at_node = true, +}; + +static void block_job_drained_begin(void *opaque) +{ + BlockJob *job = opaque; + block_job_pause(job); +} + +static void block_job_drained_end(void *opaque) +{ + BlockJob *job = opaque; + block_job_resume(job); +} + +static const BlockDevOps block_job_dev_ops = { + .drained_begin = block_job_drained_begin, + .drained_end = block_job_drained_end, +}; + void block_job_remove_all_bdrv(BlockJob *job) { GSList *l; @@ -471,11 +486,6 @@ void block_job_complete(BlockJob *job, Error **errp) job->driver->complete(job, errp); } -void block_job_pause(BlockJob *job) -{ - job->pause_count++; -} - void block_job_user_pause(BlockJob *job) { job->user_paused = true; @@ -520,16 +530,6 @@ void coroutine_fn block_job_pause_point(BlockJob *job) } } -void block_job_resume(BlockJob *job) -{ - assert(job->pause_count > 0); - job->pause_count--; - if (job->pause_count) { - return; - } - block_job_enter(job); -} - void block_job_user_resume(BlockJob *job) { if (job && job->user_paused && job->pause_count > 0) { @@ -723,6 +723,30 @@ static void block_job_event_completed(BlockJob *job, const char *msg) &error_abort); } +void block_job_pause_all(void) +{ + BlockJob *job = NULL; + while ((job = block_job_next(job))) { + AioContext *aio_context = blk_get_aio_context(job->blk); + + aio_context_acquire(aio_context); + block_job_pause(job); + aio_context_release(aio_context); + } +} + +void block_job_resume_all(void) +{ + BlockJob *job = NULL; + while ((job = block_job_next(job))) { + AioContext *aio_context = blk_get_aio_context(job->blk); + + aio_context_acquire(aio_context); + block_job_resume(job); + aio_context_release(aio_context); + } +} + void block_job_event_ready(BlockJob *job) { job->ready = true; diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 9e906f7d7e..09c7c694b5 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -234,14 +234,6 @@ void block_job_complete(BlockJob *job, Error **errp); */ BlockJobInfo *block_job_query(BlockJob *job, Error **errp); -/** - * block_job_pause: - * @job: The job to be paused. - * - * Asynchronously pause the specified job. - */ -void block_job_pause(BlockJob *job); - /** * block_job_user_pause: * @job: The job to be paused. @@ -259,14 +251,6 @@ void block_job_user_pause(BlockJob *job); */ bool block_job_user_paused(BlockJob *job); -/** - * block_job_resume: - * @job: The job to be resumed. - * - * Resume the specified job. Must be paired with a preceding block_job_pause. - */ -void block_job_resume(BlockJob *job); - /** * block_job_user_resume: * @job: The job to be resumed. diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index 45cdfd4ac1..4f2d2ac75a 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -155,6 +155,20 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns); */ void block_job_yield(BlockJob *job); +/** + * block_job_pause_all: + * + * Asynchronously pause all jobs. + */ +void block_job_pause_all(void); + +/** + * block_job_resume_all: + * + * Resume all block jobs. Must be paired with a preceding block_job_pause_all. + */ +void block_job_resume_all(void); + /** * block_job_early_fail: * @bs: The block device. From 88691b37f86250647816d77e8e1f0933557e7df5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 May 2017 16:13:04 +0200 Subject: [PATCH 05/12] blockjob: separate monitor and blockjob APIs We have two different headers for block job operations, blockjob.h and blockjob_int.h. The former contains APIs called by the monitor, the latter contains APIs called by the block job drivers and the block layer itself. Keep the two APIs separate in the blockjob.c file too. This will be useful when transitioning away from the AioContext lock, because there will be locking policies for the two categories, too---the monitor will have to call new block_job_lock/unlock APIs, while blockjob APIs will take care of this for the users. Reviewed-by: Stefan Hajnoczi Signed-off-by: Paolo Bonzini Message-id: 20170508141310.8674-6-pbonzini@redhat.com Signed-off-by: Jeff Cody --- blockjob.c | 390 ++++++++++++++++++++++++++++------------------------- 1 file changed, 205 insertions(+), 185 deletions(-) diff --git a/blockjob.c b/blockjob.c index 85ad610cae..a8ecb41aef 100644 --- a/blockjob.c +++ b/blockjob.c @@ -55,6 +55,21 @@ struct BlockJobTxn { static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs); +/* + * The block job API is composed of two categories of functions. + * + * The first includes functions used by the monitor. The monitor is + * peculiar in that it accesses the block job list with block_job_get, and + * therefore needs consistency across block_job_get and the actual operation + * (e.g. block_job_set_speed). The consistency is achieved with + * aio_context_acquire/release. These functions are declared in blockjob.h. + * + * The second includes functions used by the block job drivers and sometimes + * by the core block layer. These do not care about locking, because the + * whole coroutine runs under the AioContext lock, and are declared in + * blockjob_int.h. + */ + BlockJob *block_job_next(BlockJob *job) { if (!job) { @@ -216,90 +231,6 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, return 0; } -void *block_job_create(const char *job_id, const BlockJobDriver *driver, - BlockDriverState *bs, uint64_t perm, - uint64_t shared_perm, int64_t speed, int flags, - BlockCompletionFunc *cb, void *opaque, Error **errp) -{ - BlockBackend *blk; - BlockJob *job; - int ret; - - if (bs->job) { - error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); - return NULL; - } - - if (job_id == NULL && !(flags & BLOCK_JOB_INTERNAL)) { - job_id = bdrv_get_device_name(bs); - if (!*job_id) { - error_setg(errp, "An explicit job ID is required for this node"); - return NULL; - } - } - - if (job_id) { - if (flags & BLOCK_JOB_INTERNAL) { - error_setg(errp, "Cannot specify job ID for internal block job"); - return NULL; - } - - if (!id_wellformed(job_id)) { - error_setg(errp, "Invalid job ID '%s'", job_id); - return NULL; - } - - if (block_job_get(job_id)) { - error_setg(errp, "Job ID '%s' already in use", job_id); - return NULL; - } - } - - blk = blk_new(perm, shared_perm); - ret = blk_insert_bs(blk, bs, errp); - if (ret < 0) { - blk_unref(blk); - return NULL; - } - - job = g_malloc0(driver->instance_size); - job->driver = driver; - job->id = g_strdup(job_id); - job->blk = blk; - job->cb = cb; - job->opaque = opaque; - job->busy = false; - job->paused = true; - job->pause_count = 1; - job->refcnt = 1; - - error_setg(&job->blocker, "block device is in use by block job: %s", - BlockJobType_lookup[driver->job_type]); - block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); - bs->job = job; - - blk_set_dev_ops(blk, &block_job_dev_ops, job); - bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); - - QLIST_INSERT_HEAD(&block_jobs, job, job_list); - - blk_add_aio_context_notifier(blk, block_job_attached_aio_context, - block_job_detach_aio_context, job); - - /* Only set speed when necessary to avoid NotSupported error */ - if (speed != 0) { - Error *local_err = NULL; - - block_job_set_speed(job, speed, &local_err); - if (local_err) { - block_job_unref(job); - error_propagate(errp, local_err); - return NULL; - } - } - return job; -} - bool block_job_is_internal(BlockJob *job) { return (job->id == NULL); @@ -334,11 +265,6 @@ void block_job_start(BlockJob *job) bdrv_coroutine_enter(blk_bs(job->blk), job->co); } -void block_job_early_fail(BlockJob *job) -{ - block_job_unref(job); -} - static void block_job_completed_single(BlockJob *job) { if (!job->ret) { @@ -440,21 +366,6 @@ static void block_job_completed_txn_success(BlockJob *job) } } -void block_job_completed(BlockJob *job, int ret) -{ - assert(blk_bs(job->blk)->job == job); - assert(!job->completed); - job->completed = true; - job->ret = ret; - if (!job->txn) { - block_job_completed_single(job); - } else if (ret < 0 || block_job_is_cancelled(job)) { - block_job_completed_txn_abort(job); - } else { - block_job_completed_txn_success(job); - } -} - void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) { Error *local_err = NULL; @@ -492,44 +403,11 @@ void block_job_user_pause(BlockJob *job) block_job_pause(job); } -static bool block_job_should_pause(BlockJob *job) -{ - return job->pause_count > 0; -} - bool block_job_user_paused(BlockJob *job) { return job->user_paused; } -void coroutine_fn block_job_pause_point(BlockJob *job) -{ - assert(job && block_job_started(job)); - - if (!block_job_should_pause(job)) { - return; - } - if (block_job_is_cancelled(job)) { - return; - } - - if (job->driver->pause) { - job->driver->pause(job); - } - - if (block_job_should_pause(job) && !block_job_is_cancelled(job)) { - job->paused = true; - job->busy = false; - qemu_coroutine_yield(); /* wait for block_job_resume() */ - job->busy = true; - job->paused = false; - } - - if (job->driver->resume) { - job->driver->resume(job); - } -} - void block_job_user_resume(BlockJob *job) { if (job && job->user_paused && job->pause_count > 0) { @@ -538,13 +416,6 @@ void block_job_user_resume(BlockJob *job) } } -void block_job_enter(BlockJob *job) -{ - if (job->co && !job->busy) { - bdrv_coroutine_enter(blk_bs(job->blk), job->co); - } -} - void block_job_cancel(BlockJob *job) { if (block_job_started(job)) { @@ -556,11 +427,6 @@ void block_job_cancel(BlockJob *job) } } -bool block_job_is_cancelled(BlockJob *job) -{ - return job->cancelled; -} - void block_job_iostatus_reset(BlockJob *job) { job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; @@ -628,42 +494,6 @@ int block_job_complete_sync(BlockJob *job, Error **errp) return block_job_finish_sync(job, &block_job_complete, errp); } -void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns) -{ - assert(job->busy); - - /* Check cancellation *before* setting busy = false, too! */ - if (block_job_is_cancelled(job)) { - return; - } - - job->busy = false; - if (!block_job_should_pause(job)) { - co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns); - } - job->busy = true; - - block_job_pause_point(job); -} - -void block_job_yield(BlockJob *job) -{ - assert(job->busy); - - /* Check cancellation *before* setting busy = false, too! */ - if (block_job_is_cancelled(job)) { - return; - } - - job->busy = false; - if (!block_job_should_pause(job)) { - qemu_coroutine_yield(); - } - job->busy = true; - - block_job_pause_point(job); -} - BlockJobInfo *block_job_query(BlockJob *job, Error **errp) { BlockJobInfo *info; @@ -723,6 +553,95 @@ static void block_job_event_completed(BlockJob *job, const char *msg) &error_abort); } +/* + * API for block job drivers and the block layer. These functions are + * declared in blockjob_int.h. + */ + +void *block_job_create(const char *job_id, const BlockJobDriver *driver, + BlockDriverState *bs, uint64_t perm, + uint64_t shared_perm, int64_t speed, int flags, + BlockCompletionFunc *cb, void *opaque, Error **errp) +{ + BlockBackend *blk; + BlockJob *job; + int ret; + + if (bs->job) { + error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); + return NULL; + } + + if (job_id == NULL && !(flags & BLOCK_JOB_INTERNAL)) { + job_id = bdrv_get_device_name(bs); + if (!*job_id) { + error_setg(errp, "An explicit job ID is required for this node"); + return NULL; + } + } + + if (job_id) { + if (flags & BLOCK_JOB_INTERNAL) { + error_setg(errp, "Cannot specify job ID for internal block job"); + return NULL; + } + + if (!id_wellformed(job_id)) { + error_setg(errp, "Invalid job ID '%s'", job_id); + return NULL; + } + + if (block_job_get(job_id)) { + error_setg(errp, "Job ID '%s' already in use", job_id); + return NULL; + } + } + + blk = blk_new(perm, shared_perm); + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + blk_unref(blk); + return NULL; + } + + job = g_malloc0(driver->instance_size); + job->driver = driver; + job->id = g_strdup(job_id); + job->blk = blk; + job->cb = cb; + job->opaque = opaque; + job->busy = false; + job->paused = true; + job->pause_count = 1; + job->refcnt = 1; + + error_setg(&job->blocker, "block device is in use by block job: %s", + BlockJobType_lookup[driver->job_type]); + block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); + bs->job = job; + + blk_set_dev_ops(blk, &block_job_dev_ops, job); + bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); + + QLIST_INSERT_HEAD(&block_jobs, job, job_list); + + blk_add_aio_context_notifier(blk, block_job_attached_aio_context, + block_job_detach_aio_context, job); + + /* Only set speed when necessary to avoid NotSupported error */ + if (speed != 0) { + Error *local_err = NULL; + + block_job_set_speed(job, speed, &local_err); + if (local_err) { + block_job_unref(job); + error_propagate(errp, local_err); + return NULL; + } + } + return job; +} + void block_job_pause_all(void) { BlockJob *job = NULL; @@ -735,6 +654,59 @@ void block_job_pause_all(void) } } +void block_job_early_fail(BlockJob *job) +{ + block_job_unref(job); +} + +void block_job_completed(BlockJob *job, int ret) +{ + assert(blk_bs(job->blk)->job == job); + assert(!job->completed); + job->completed = true; + job->ret = ret; + if (!job->txn) { + block_job_completed_single(job); + } else if (ret < 0 || block_job_is_cancelled(job)) { + block_job_completed_txn_abort(job); + } else { + block_job_completed_txn_success(job); + } +} + +static bool block_job_should_pause(BlockJob *job) +{ + return job->pause_count > 0; +} + +void coroutine_fn block_job_pause_point(BlockJob *job) +{ + assert(job && block_job_started(job)); + + if (!block_job_should_pause(job)) { + return; + } + if (block_job_is_cancelled(job)) { + return; + } + + if (job->driver->pause) { + job->driver->pause(job); + } + + if (block_job_should_pause(job) && !block_job_is_cancelled(job)) { + job->paused = true; + job->busy = false; + qemu_coroutine_yield(); /* wait for block_job_resume() */ + job->busy = true; + job->paused = false; + } + + if (job->driver->resume) { + job->driver->resume(job); + } +} + void block_job_resume_all(void) { BlockJob *job = NULL; @@ -747,6 +719,54 @@ void block_job_resume_all(void) } } +void block_job_enter(BlockJob *job) +{ + if (job->co && !job->busy) { + bdrv_coroutine_enter(blk_bs(job->blk), job->co); + } +} + +bool block_job_is_cancelled(BlockJob *job) +{ + return job->cancelled; +} + +void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns) +{ + assert(job->busy); + + /* Check cancellation *before* setting busy = false, too! */ + if (block_job_is_cancelled(job)) { + return; + } + + job->busy = false; + if (!block_job_should_pause(job)) { + co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns); + } + job->busy = true; + + block_job_pause_point(job); +} + +void block_job_yield(BlockJob *job) +{ + assert(job->busy); + + /* Check cancellation *before* setting busy = false, too! */ + if (block_job_is_cancelled(job)) { + return; + } + + job->busy = false; + if (!block_job_should_pause(job)) { + qemu_coroutine_yield(); + } + job->busy = true; + + block_job_pause_point(job); +} + void block_job_event_ready(BlockJob *job) { job->ready = true; From 2caf63a9039f245ef778833dfc87f0c58adba47b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 May 2017 16:13:05 +0200 Subject: [PATCH 06/12] blockjob: move iostatus reset inside block_job_user_resume Outside blockjob.c, the block_job_iostatus_reset function is used once in the monitor and once in BlockBackend. When we introduce the block job mutex, block_job_iostatus_reset's client is going to be the block layer (for which blockjob.c will take the block job mutex) rather than the monitor (which will take the block job mutex by itself). The monitor's call to block_job_iostatus_reset from the monitor comes just before the sole call to block_job_user_resume, so reset the iostatus directly from block_job_iostatus_reset. This will avoid the need to introduce separate block_job_iostatus_reset and block_job_iostatus_reset_locked APIs. After making this change, move the function together with the others that were moved in the previous patch. Reviewed-by: Stefan Hajnoczi Reviewed-by: John Snow Signed-off-by: Paolo Bonzini Reviewed-by: Jeff Cody Message-id: 20170508141310.8674-7-pbonzini@redhat.com Signed-off-by: Jeff Cody --- blockdev.c | 1 - blockjob.c | 11 ++++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/blockdev.c b/blockdev.c index c63f4e82c7..892d768574 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3715,7 +3715,6 @@ void qmp_block_job_resume(const char *device, Error **errp) } trace_qmp_block_job_resume(job); - block_job_iostatus_reset(job); block_job_user_resume(job); aio_context_release(aio_context); } diff --git a/blockjob.c b/blockjob.c index a8ecb41aef..cd1f4c0712 100644 --- a/blockjob.c +++ b/blockjob.c @@ -412,6 +412,7 @@ void block_job_user_resume(BlockJob *job) { if (job && job->user_paused && job->pause_count > 0) { job->user_paused = false; + block_job_iostatus_reset(job); block_job_resume(job); } } @@ -427,11 +428,6 @@ void block_job_cancel(BlockJob *job) } } -void block_job_iostatus_reset(BlockJob *job) -{ - job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; -} - static int block_job_finish_sync(BlockJob *job, void (*finish)(BlockJob *, Error **errp), Error **errp) @@ -767,6 +763,11 @@ void block_job_yield(BlockJob *job) block_job_pause_point(job); } +void block_job_iostatus_reset(BlockJob *job) +{ + job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; +} + void block_job_event_ready(BlockJob *job) { job->ready = true; From 4c241cf5d6ca06c682e033cf8b327b63b1f4b784 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 May 2017 16:13:06 +0200 Subject: [PATCH 07/12] blockjob: introduce block_job_cancel_async, check iostatus invariants The new functions helps respecting the invariant that the coroutine is entered with false user_resume, zero pause count and no error recorded in the iostatus. Resetting the iostatus is now common to all of block_job_cancel_async, block_job_user_resume and block_job_iostatus_reset, albeit with slight differences: - block_job_cancel_async resets the iostatus, and resumes the job if there was an error, but the coroutine is not restarted immediately. For example the caller may continue with a call to block_job_finish_sync. - block_job_user_resume resets the iostatus. It wants to resume the job unconditionally, even if there was no error. - block_job_iostatus_reset doesn't resume the job at all. Maybe that's a bug but it should be fixed separately. block_job_iostatus_reset does the least common denominator, so add some checking but otherwise leave it as the entry point for resetting the iostatus. Reviewed-by: Stefan Hajnoczi Signed-off-by: Paolo Bonzini Message-id: 20170508141310.8674-8-pbonzini@redhat.com Signed-off-by: Jeff Cody --- blockjob.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/blockjob.c b/blockjob.c index cd1f4c0712..eae8fe7cbf 100644 --- a/blockjob.c +++ b/blockjob.c @@ -304,6 +304,19 @@ static void block_job_completed_single(BlockJob *job) block_job_unref(job); } +static void block_job_cancel_async(BlockJob *job) +{ + if (job->iostatus != BLOCK_DEVICE_IO_STATUS_OK) { + block_job_iostatus_reset(job); + } + if (job->user_paused) { + /* Do not call block_job_enter here, the caller will handle it. */ + job->user_paused = false; + job->pause_count--; + } + job->cancelled = true; +} + static void block_job_completed_txn_abort(BlockJob *job) { AioContext *ctx; @@ -328,7 +341,7 @@ static void block_job_completed_txn_abort(BlockJob *job) * them; this job, however, may or may not be cancelled, depending * on the caller, so leave it. */ if (other_job != job) { - other_job->cancelled = true; + block_job_cancel_async(other_job); } continue; } @@ -411,8 +424,8 @@ bool block_job_user_paused(BlockJob *job) void block_job_user_resume(BlockJob *job) { if (job && job->user_paused && job->pause_count > 0) { - job->user_paused = false; block_job_iostatus_reset(job); + job->user_paused = false; block_job_resume(job); } } @@ -420,8 +433,7 @@ void block_job_user_resume(BlockJob *job) void block_job_cancel(BlockJob *job) { if (block_job_started(job)) { - job->cancelled = true; - block_job_iostatus_reset(job); + block_job_cancel_async(job); block_job_enter(job); } else { block_job_completed(job, -ECANCELED); @@ -765,6 +777,10 @@ void block_job_yield(BlockJob *job) void block_job_iostatus_reset(BlockJob *job) { + if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { + return; + } + assert(job->user_paused && job->pause_count > 0); job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; } From c8ab5c2dde2d9321552830350e60c83a059486d7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 May 2017 16:13:07 +0200 Subject: [PATCH 08/12] blockjob: group BlockJob transaction functions together Yet another pure code movement patch, preparing for the next change. Reviewed-by: Stefan Hajnoczi Signed-off-by: Paolo Bonzini Message-id: 20170508141310.8674-9-pbonzini@redhat.com Signed-off-by: Jeff Cody --- blockjob.c | 128 ++++++++++++++++++++++++++--------------------------- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/blockjob.c b/blockjob.c index eae8fe7cbf..c6ae07d3f4 100644 --- a/blockjob.c +++ b/blockjob.c @@ -91,6 +91,39 @@ BlockJob *block_job_get(const char *id) return NULL; } +BlockJobTxn *block_job_txn_new(void) +{ + BlockJobTxn *txn = g_new0(BlockJobTxn, 1); + QLIST_INIT(&txn->jobs); + txn->refcnt = 1; + return txn; +} + +static void block_job_txn_ref(BlockJobTxn *txn) +{ + txn->refcnt++; +} + +void block_job_txn_unref(BlockJobTxn *txn) +{ + if (txn && --txn->refcnt == 0) { + g_free(txn); + } +} + +void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job) +{ + if (!txn) { + return; + } + + assert(!job->txn); + job->txn = txn; + + QLIST_INSERT_HEAD(&txn->jobs, job, txn_list); + block_job_txn_ref(txn); +} + static void block_job_pause(BlockJob *job) { job->pause_count++; @@ -317,6 +350,37 @@ static void block_job_cancel_async(BlockJob *job) job->cancelled = true; } +static int block_job_finish_sync(BlockJob *job, + void (*finish)(BlockJob *, Error **errp), + Error **errp) +{ + Error *local_err = NULL; + int ret; + + assert(blk_bs(job->blk)->job == job); + + block_job_ref(job); + + finish(job, &local_err); + if (local_err) { + error_propagate(errp, local_err); + block_job_unref(job); + return -EBUSY; + } + /* block_job_drain calls block_job_enter, and it should be enough to + * induce progress until the job completes or moves to the main thread. + */ + while (!job->deferred_to_main_loop && !job->completed) { + block_job_drain(job); + } + while (!job->completed) { + aio_poll(qemu_get_aio_context(), true); + } + ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret; + block_job_unref(job); + return ret; +} + static void block_job_completed_txn_abort(BlockJob *job) { AioContext *ctx; @@ -440,37 +504,6 @@ void block_job_cancel(BlockJob *job) } } -static int block_job_finish_sync(BlockJob *job, - void (*finish)(BlockJob *, Error **errp), - Error **errp) -{ - Error *local_err = NULL; - int ret; - - assert(blk_bs(job->blk)->job == job); - - block_job_ref(job); - - finish(job, &local_err); - if (local_err) { - error_propagate(errp, local_err); - block_job_unref(job); - return -EBUSY; - } - /* block_job_drain calls block_job_enter, and it should be enough to - * induce progress until the job completes or moves to the main thread. - */ - while (!job->deferred_to_main_loop && !job->completed) { - block_job_drain(job); - } - while (!job->completed) { - aio_poll(qemu_get_aio_context(), true); - } - ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret; - block_job_unref(job); - return ret; -} - /* A wrapper around block_job_cancel() taking an Error ** parameter so it may be * used with block_job_finish_sync() without the need for (rather nasty) * function pointer casts there. */ @@ -883,36 +916,3 @@ void block_job_defer_to_main_loop(BlockJob *job, aio_bh_schedule_oneshot(qemu_get_aio_context(), block_job_defer_to_main_loop_bh, data); } - -BlockJobTxn *block_job_txn_new(void) -{ - BlockJobTxn *txn = g_new0(BlockJobTxn, 1); - QLIST_INIT(&txn->jobs); - txn->refcnt = 1; - return txn; -} - -static void block_job_txn_ref(BlockJobTxn *txn) -{ - txn->refcnt++; -} - -void block_job_txn_unref(BlockJobTxn *txn) -{ - if (txn && --txn->refcnt == 0) { - g_free(txn); - } -} - -void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job) -{ - if (!txn) { - return; - } - - assert(!job->txn); - job->txn = txn; - - QLIST_INSERT_HEAD(&txn->jobs, job, txn_list); - block_job_txn_ref(txn); -} From 7e74a73499059263212048ad989facae5500d230 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 May 2017 16:13:08 +0200 Subject: [PATCH 09/12] blockjob: strengthen a bit test-blockjob-txn Unlike test-blockjob-txn, QMP releases the reference to the transaction before the jobs finish. Thus, qemu-iotest 124 showed a failure while working on the next patch that the unit tests did not have. Make the test a little nastier. Reviewed-by: Stefan Hajnoczi Reviewed-by: John Snow Signed-off-by: Paolo Bonzini Message-id: 20170508141310.8674-10-pbonzini@redhat.com Signed-off-by: Jeff Cody --- tests/test-blockjob-txn.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c index 0f80194e85..c77343fc04 100644 --- a/tests/test-blockjob-txn.c +++ b/tests/test-blockjob-txn.c @@ -167,6 +167,11 @@ static void test_pair_jobs(int expected1, int expected2) block_job_start(job1); block_job_start(job2); + /* Release our reference now to trigger as many nice + * use-after-free bugs as possible. + */ + block_job_txn_unref(txn); + if (expected1 == -ECANCELED) { block_job_cancel(job1); } @@ -187,8 +192,6 @@ static void test_pair_jobs(int expected1, int expected2) g_assert_cmpint(result1, ==, expected1); g_assert_cmpint(result2, ==, expected2); - - block_job_txn_unref(txn); } static void test_pair_jobs_success(void) From 4fb588e95bacb770746cc794ef165fd71a4d27ea Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 May 2017 16:13:09 +0200 Subject: [PATCH 10/12] blockjob: reorganize block_job_completed_txn_abort This splits the part that touches job states from the part that invokes callbacks. It will make the code simpler to understand once job states will be protected by a different mutex than the AioContext lock. Reviewed-by: Stefan Hajnoczi Signed-off-by: Paolo Bonzini Message-id: 20170508141310.8674-11-pbonzini@redhat.com Signed-off-by: Jeff Cody --- blockjob.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/blockjob.c b/blockjob.c index c6ae07d3f4..9a97c73d77 100644 --- a/blockjob.c +++ b/blockjob.c @@ -300,6 +300,8 @@ void block_job_start(BlockJob *job) static void block_job_completed_single(BlockJob *job) { + assert(job->completed); + if (!job->ret) { if (job->driver->commit) { job->driver->commit(job); @@ -361,7 +363,9 @@ static int block_job_finish_sync(BlockJob *job, block_job_ref(job); - finish(job, &local_err); + if (finish) { + finish(job, &local_err); + } if (local_err) { error_propagate(errp, local_err); block_job_unref(job); @@ -385,7 +389,7 @@ static void block_job_completed_txn_abort(BlockJob *job) { AioContext *ctx; BlockJobTxn *txn = job->txn; - BlockJob *other_job, *next; + BlockJob *other_job; if (txn->aborting) { /* @@ -394,29 +398,34 @@ static void block_job_completed_txn_abort(BlockJob *job) return; } txn->aborting = true; + block_job_txn_ref(txn); + /* We are the first failed job. Cancel other jobs. */ QLIST_FOREACH(other_job, &txn->jobs, txn_list) { ctx = blk_get_aio_context(other_job->blk); aio_context_acquire(ctx); } + + /* Other jobs are effectively cancelled by us, set the status for + * them; this job, however, may or may not be cancelled, depending + * on the caller, so leave it. */ QLIST_FOREACH(other_job, &txn->jobs, txn_list) { - if (other_job == job || other_job->completed) { - /* Other jobs are "effectively" cancelled by us, set the status for - * them; this job, however, may or may not be cancelled, depending - * on the caller, so leave it. */ - if (other_job != job) { - block_job_cancel_async(other_job); - } - continue; + if (other_job != job) { + block_job_cancel_async(other_job); } - block_job_cancel_sync(other_job); - assert(other_job->completed); } - QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { + while (!QLIST_EMPTY(&txn->jobs)) { + other_job = QLIST_FIRST(&txn->jobs); ctx = blk_get_aio_context(other_job->blk); + if (!other_job->completed) { + assert(other_job->cancelled); + block_job_finish_sync(other_job, NULL, NULL); + } block_job_completed_single(other_job); aio_context_release(ctx); } + + block_job_txn_unref(txn); } static void block_job_completed_txn_success(BlockJob *job) From eb05e011e248c6fb6baee295e14fd206e136028c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 May 2017 16:13:10 +0200 Subject: [PATCH 11/12] blockjob: use deferred_to_main_loop to indicate the coroutine has ended All block jobs are using block_job_defer_to_main_loop as the final step just before the coroutine terminates. At this point, block_job_enter should do nothing, but currently it restarts the freed coroutine. Now, the job->co states should probably be changed to an enum (e.g. BEFORE_START, STARTED, YIELDED, COMPLETED) subsuming block_job_started, job->deferred_to_main_loop and job->busy. For now, this patch eliminates the problematic reenter by removing the reset of job->deferred_to_main_loop (which served no purpose, as far as I could see) and checking the flag in block_job_enter. Reviewed-by: Stefan Hajnoczi Signed-off-by: Paolo Bonzini Message-id: 20170508141310.8674-12-pbonzini@redhat.com Signed-off-by: Jeff Cody --- blockjob.c | 10 ++++++++-- include/block/blockjob_int.h | 3 ++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/blockjob.c b/blockjob.c index 9a97c73d77..a0d7e29b83 100644 --- a/blockjob.c +++ b/blockjob.c @@ -771,7 +771,14 @@ void block_job_resume_all(void) void block_job_enter(BlockJob *job) { - if (job->co && !job->busy) { + if (!block_job_started(job)) { + return; + } + if (job->deferred_to_main_loop) { + return; + } + + if (!job->busy) { bdrv_coroutine_enter(blk_bs(job->blk), job->co); } } @@ -899,7 +906,6 @@ static void block_job_defer_to_main_loop_bh(void *opaque) aio_context_acquire(aio_context); } - data->job->deferred_to_main_loop = false; data->fn(data->job, data->opaque); if (aio_context != data->aio_context) { diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index 4f2d2ac75a..f13ad05c0d 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -241,7 +241,8 @@ typedef void BlockJobDeferToMainLoopFn(BlockJob *job, void *opaque); * @fn: The function to run in the main loop * @opaque: The opaque value that is passed to @fn * - * Execute a given function in the main loop with the BlockDriverState + * This function must be called by the main job coroutine just before it + * returns. @fn is executed in the main loop with the BlockDriverState * AioContext acquired. Block jobs must call bdrv_unref(), bdrv_close(), and * anything that uses bdrv_drain_all() in the main loop. * From 223a23c198787328ae75bc65d84edf5fde33c0b6 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Tue, 23 May 2017 13:27:50 -0400 Subject: [PATCH 12/12] block/gluster: glfs_lseek() workaround On current released versions of glusterfs, glfs_lseek() will sometimes return invalid values for SEEK_DATA or SEEK_HOLE. For SEEK_DATA and SEEK_HOLE, the returned value should be >= the passed offset, or < 0 in the case of error: LSEEK(2): off_t lseek(int fd, off_t offset, int whence); [...] SEEK_HOLE Adjust the file offset to the next hole in the file greater than or equal to offset. If offset points into the middle of a hole, then the file offset is set to offset. If there is no hole past offset, then the file offset is adjusted to the end of the file (i.e., there is an implicit hole at the end of any file). [...] RETURN VALUE Upon successful completion, lseek() returns the resulting offset location as measured in bytes from the beginning of the file. On error, the value (off_t) -1 is returned and errno is set to indicate the error However, occasionally glfs_lseek() for SEEK_HOLE/DATA will return a value less than the passed offset, yet greater than zero. For instance, here are example values observed from this call: offs = glfs_lseek(s->fd, start, SEEK_HOLE); if (offs < 0) { return -errno; /* D1 and (H3 or H4) */ } start == 7608336384 offs == 7607877632 This causes QEMU to abort on the assert test. When this value is returned, errno is also 0. This is a reported and known bug to glusterfs: https://bugzilla.redhat.com/show_bug.cgi?id=1425293 Although this is being fixed in gluster, we still should work around it in QEMU, given that multiple released versions of gluster behave this way. This patch treats the return case of (offs < start) the same as if an error value other than ENXIO is returned; we will assume we learned nothing, and there are no holes in the file. Signed-off-by: Jeff Cody Reviewed-by: Eric Blake Reviewed-by: Niels de Vos Message-id: 87c0140e9407c08f6e74b04131b610f2e27c014c.1495560397.git.jcody@redhat.com Signed-off-by: Jeff Cody --- block/gluster.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/block/gluster.c b/block/gluster.c index 7c76cd0988..8ba3bcca0b 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -1275,7 +1275,14 @@ static int find_allocation(BlockDriverState *bs, off_t start, if (offs < 0) { return -errno; /* D3 or D4 */ } - assert(offs >= start); + + if (offs < start) { + /* This is not a valid return by lseek(). We are safe to just return + * -EIO in this case, and we'll treat it like D4. Unfortunately some + * versions of gluster server will return offs < start, so an assert + * here will unnecessarily abort QEMU. */ + return -EIO; + } if (offs > start) { /* D2: in hole, next data at offs */ @@ -1307,7 +1314,14 @@ static int find_allocation(BlockDriverState *bs, off_t start, if (offs < 0) { return -errno; /* D1 and (H3 or H4) */ } - assert(offs >= start); + + if (offs < start) { + /* This is not a valid return by lseek(). We are safe to just return + * -EIO in this case, and we'll treat it like H4. Unfortunately some + * versions of gluster server will return offs < start, so an assert + * here will unnecessarily abort QEMU. */ + return -EIO; + } if (offs > start) { /*