Block patches for 2.1.0-rc2 (v2)

-----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQIcBAABAgAGBQJTw6scAAoJEH8JsnLIjy/WWk8QAMmThVQhJqajBbriVJWfB5w4 A4ZaRpk+NsDUZhzbsVBJ06ZdlQSX76JOT17V1hDFe7wcu1Aq8XL2b0PzV4iDNRgJ QPjW7vKAfFq/ANziGfgfAhpeQkq8o5+R8OepmdqJXzIDCjg+nDtJVdsc7G8DIWOx 1ssEqW8zaRuZMck61mo3hpdCCA+m+3HTHhcDEf/lzS69XHi2+BZ6ATVh3zShJqxy +68hDHdPNMTWESkN5MBQAo82flbdmNjqpD5SYYuJsoCOV+Tb5jLUGbED/VM1LqCf 8ukXlu4TO0u3ZmO+3XeQJiNsBSQEGOp9/9gOYj8J7AaZcZzC4cON3RmiYLZLgapj zdKIvgCxuFzyFheJYPCwpr3483w6/mh4uMzasZ+jHETqieAyvy1L860FNdWQaXoX K96m/1yIaQ2NOogcWrxZZ4Jt/diKh+NWynBFm8MZON6MK46FLiCcma2ZedoX6dNc R+Ul0qiYMo5B9fX05uhf15dU8cmVQuVFRo2ftIIqxZDY9IPjJjrJPw9EjHajGIJb MpU25NRHCdf0BscgYufuf1W9llasl0fbAd3SIA4FccTFdAeDwu5SQXHTodhu64hh 7gf23N7Let/Gnucxx7gOTi1Jz3uR8V7MbIYRgBBvyRqhNRRRjYDwpg2c8guCzM2F VObGeTRdNa9QhTXqBPdQ =YxHy -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging Block patches for 2.1.0-rc2 (v2) # gpg: Signature made Mon 14 Jul 2014 11:04:12 BST using RSA key ID C88F2FD6 # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" * remotes/kevin/tags/for-upstream: (22 commits) ide: Treat read/write beyond end as invalid virtio-blk: Treat read/write beyond end as invalid virtio-blk: Bypass error action and I/O accounting on invalid r/w virtio-blk: Factor common checks out of virtio_blk_handle_read/write() dma-helpers: Fix too long qiov qtest: fix vhost-user-test compilation with old GLib tests: Fix unterminated string output visitor enum human string AioContext: do not rely on aio_poll(ctx, true) result to end a loop virtio-blk: embed VirtQueueElement in VirtIOBlockReq virtio-blk: avoid g_slice_new0() for VirtIOBlockReq and VirtQueueElement dataplane: do not free VirtQueueElement in vring_push() virtio-blk: avoid dataplane VirtIOBlockReq early free block: Assert qiov length matches request length qed: Make qiov match request size until backing file EOF qcow2: Make qiov match request size until backing file EOF block: Make qiov match the request size until EOF AioContext: speed up aio_notify test-aio: fix GSource-based timer test block: drop aio functions that operate on the main AioContext block: prefer aio_poll to qemu_aio_wait ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2014-07-14 13:09:29 +01:00 · 2014-07-14 13:09:29 +01:00 · 7a6d04e73f
parent c15a34eda0 58ac321135
commit 7a6d04e73f
30 changed files with 699 additions and 167 deletions
--- a/aio-posix.c
+++ b/aio-posix.c
@ -125,7 +125,7 @@ static bool aio_dispatch(AioContext *ctx)
    bool progress = false;
    /*
-     * We have to walk very carefully in case qemu_aio_set_fd_handler is
+     * We have to walk very carefully in case aio_set_fd_handler is
     * called while we're walking.
     */
    node = QLIST_FIRST(&ctx->aio_handlers);
@ -175,27 +175,56 @@ static bool aio_dispatch(AioContext *ctx)
 bool aio_poll(AioContext *ctx, bool blocking)
 {
    AioHandler *node;
    bool was_dispatching;
    int ret;
    bool progress;
    was_dispatching = ctx->dispatching;
    progress = false;
    /* aio_notify can avoid the expensive event_notifier_set if
     * everything (file descriptors, bottom halves, timers) will
     * be re-evaluated before the next blocking poll().  This happens
     * in two cases:
     *
     * 1) when aio_poll is called with blocking == false
     *
     * 2) when we are called after poll().  If we are called before
     *    poll(), bottom halves will not be re-evaluated and we need
     *    aio_notify() if blocking == true.
     *
     * The first aio_dispatch() only does something when AioContext is
     * running as a GSource, and in that case aio_poll is used only
     * with blocking == false, so this optimization is already quite
     * effective.  However, the code is ugly and should be restructured
     * to have a single aio_dispatch() call.  To do this, we need to
     * reorganize aio_poll into a prepare/poll/dispatch model like
     * glib's.
     *
     * If we're in a nested event loop, ctx->dispatching might be true.
     * In that case we can restore it just before returning, but we
     * have to clear it now.
     */
    aio_set_dispatching(ctx, !blocking);
    /*
     * If there are callbacks left that have been queued, we need to call them.
     * Do not call select in this case, because it is possible that the caller
-     * does not need a complete flush (as is the case for qemu_aio_wait loops).
+     * does not need a complete flush (as is the case for aio_poll loops).
     */
    if (aio_bh_poll(ctx)) {
        blocking = false;
        progress = true;
    }
    /* Re-evaluate condition (1) above.  */
    aio_set_dispatching(ctx, !blocking);
    if (aio_dispatch(ctx)) {
        progress = true;
    }
    if (progress && !blocking) {
-        return true;
+        goto out;
    }
    ctx->walking_handlers++;
@ -234,9 +263,12 @@ bool aio_poll(AioContext *ctx, bool blocking)
    }
    /* Run dispatch even if there were no readable fds to run timers */
    aio_set_dispatching(ctx, true);
    if (aio_dispatch(ctx)) {
        progress = true;
    }
 out:
    aio_set_dispatching(ctx, was_dispatching);
    return progress;
 }
--- a/aio-win32.c
+++ b/aio-win32.c
@ -102,7 +102,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
    /*
     * If there are callbacks left that have been queued, we need to call then.
     * Do not call select in this case, because it is possible that the caller
-     * does not need a complete flush (as is the case for qemu_aio_wait loops).
+     * does not need a complete flush (as is the case for aio_poll loops).
     */
    if (aio_bh_poll(ctx)) {
        blocking = false;
@ -115,7 +115,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
    /*
     * Then dispatch any pending callbacks from the GSource.
     *
-     * We have to walk very carefully in case qemu_aio_set_fd_handler is
+     * We have to walk very carefully in case aio_set_fd_handler is
     * called while we're walking.
     */
    node = QLIST_FIRST(&ctx->aio_handlers);
@ -177,7 +177,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
        blocking = false;
        /* we have to walk very carefully in case
-         * qemu_aio_set_fd_handler is called while we're walking */
+         * aio_set_fd_handler is called while we're walking */
        node = QLIST_FIRST(&ctx->aio_handlers);
        while (node) {
            AioHandler *tmp;
--- a/async.c
+++ b/async.c
@ -26,6 +26,7 @@
 #include "block/aio.h"
 #include "block/thread-pool.h"
 #include "qemu/main-loop.h"
 #include "qemu/atomic.h"
 /***********************************************************/
 /* bottom halves (can be seen as timers which expire ASAP) */
@ -247,9 +248,25 @@ ThreadPool *aio_get_thread_pool(AioContext *ctx)
    return ctx->thread_pool;
 }
 void aio_set_dispatching(AioContext *ctx, bool dispatching)
 {
    ctx->dispatching = dispatching;
    if (!dispatching) {
        /* Write ctx->dispatching before reading e.g. bh->scheduled.
         * Optimization: this is only needed when we're entering the "unsafe"
         * phase where other threads must call event_notifier_set.
         */
        smp_mb();
    }
 }
 void aio_notify(AioContext *ctx)
 {
-    event_notifier_set(&ctx->notifier);
+    /* Write e.g. bh->scheduled before reading ctx->dispatching.  */
    smp_mb();
    if (!ctx->dispatching) {
        event_notifier_set(&ctx->notifier);
    }
 }
 static void aio_timerlist_notify(void *opaque)
--- a/block.c
+++ b/block.c
@ -471,7 +471,7 @@ int bdrv_create(BlockDriver *drv, const char* filename,
        co = qemu_coroutine_create(bdrv_create_co_entry);
        qemu_coroutine_enter(co, &cco);
        while (cco.ret == NOT_DONE) {
-            qemu_aio_wait();
+            aio_poll(qemu_get_aio_context(), true);
        }
    }
@ -3010,6 +3010,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
    assert(!qiov || bytes == qiov->size);
    /* Handle Copy on Read and associated serialisation */
    if (flags & BDRV_REQ_COPY_ON_READ) {
@ -3054,8 +3055,20 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
        max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
                                  align >> BDRV_SECTOR_BITS);
        if (max_nb_sectors > 0) {
-            ret = drv->bdrv_co_readv(bs, sector_num,
+            QEMUIOVector local_qiov;
-                                     MIN(nb_sectors, max_nb_sectors), qiov);
+            size_t local_sectors;
            max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS);
            local_sectors = MIN(max_nb_sectors, nb_sectors);
            qemu_iovec_init(&local_qiov, qiov->niov);
            qemu_iovec_concat(&local_qiov, qiov, 0,
                              local_sectors * BDRV_SECTOR_SIZE);
            ret = drv->bdrv_co_readv(bs, sector_num, local_sectors,
                                     &local_qiov);
            qemu_iovec_destroy(&local_qiov);
        } else {
            ret = 0;
        }
@ -3267,6 +3280,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
    assert(!qiov || bytes == qiov->size);
    waited = wait_serialising_requests(req);
    assert(!waited || !req->serialising);
@ -4040,7 +4054,7 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
    if (ret < 0) {
        return ret;
    }
-    return (ret & BDRV_BLOCK_ALLOCATED);
+    return !!(ret & BDRV_BLOCK_ALLOCATED);
 }
 /*
--- a/block/backup.c
+++ b/block/backup.c
@ -307,7 +307,7 @@ static void coroutine_fn backup_run(void *opaque)
                                BACKUP_SECTORS_PER_CLUSTER - i, &n);
                    i += n;
-                    if (alloced == 1) {
+                    if (alloced == 1 || n == 0) {
                        break;
                    }
                }
--- a/block/qcow2.c
+++ b/block/qcow2.c
@ -1020,11 +1020,20 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
                n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov,
                    sector_num, cur_nr_sectors);
                if (n1 > 0) {
                    QEMUIOVector local_qiov;
                    qemu_iovec_init(&local_qiov, hd_qiov.niov);
                    qemu_iovec_concat(&local_qiov, &hd_qiov, 0,
                                      n1 * BDRV_SECTOR_SIZE);
                    BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
                    qemu_co_mutex_unlock(&s->lock);
                    ret = bdrv_co_readv(bs->backing_hd, sector_num,
-                                        n1, &hd_qiov);
+                                        n1, &local_qiov);
                    qemu_co_mutex_lock(&s->lock);
                    qemu_iovec_destroy(&local_qiov);
                    if (ret < 0) {
                        goto fail;
                    }
--- a/block/qed.c
+++ b/block/qed.c
@ -761,17 +761,19 @@ static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
 /**
 * Read from the backing file or zero-fill if no backing file
 *
- * @s:          QED state
+ * @s:              QED state
- * @pos:        Byte position in device
+ * @pos:            Byte position in device
- * @qiov:       Destination I/O vector
+ * @qiov:           Destination I/O vector
- * @cb:         Completion function
+ * @backing_qiov:   Possibly shortened copy of qiov, to be allocated here
- * @opaque:     User data for completion function
+ * @cb:             Completion function
 * @opaque:         User data for completion function
 *
 * This function reads qiov->size bytes starting at pos from the backing file.
 * If there is no backing file then zeroes are read.
 */
 static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
                                  QEMUIOVector *qiov,
                                  QEMUIOVector **backing_qiov,
                                  BlockDriverCompletionFunc *cb, void *opaque)
 {
    uint64_t backing_length = 0;
@ -804,15 +806,21 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
    /* If the read straddles the end of the backing file, shorten it */
    size = MIN((uint64_t)backing_length - pos, qiov->size);
    assert(*backing_qiov == NULL);
    *backing_qiov = g_new(QEMUIOVector, 1);
    qemu_iovec_init(*backing_qiov, qiov->niov);
    qemu_iovec_concat(*backing_qiov, qiov, 0, size);
    BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
    bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE,
-                   qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
+                   *backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
 }
 typedef struct {
    GenericCB gencb;
    BDRVQEDState *s;
    QEMUIOVector qiov;
    QEMUIOVector *backing_qiov;
    struct iovec iov;
    uint64_t offset;
 } CopyFromBackingFileCB;
@ -829,6 +837,12 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret)
    CopyFromBackingFileCB *copy_cb = opaque;
    BDRVQEDState *s = copy_cb->s;
    if (copy_cb->backing_qiov) {
        qemu_iovec_destroy(copy_cb->backing_qiov);
        g_free(copy_cb->backing_qiov);
        copy_cb->backing_qiov = NULL;
    }
    if (ret) {
        qed_copy_from_backing_file_cb(copy_cb, ret);
        return;
@ -866,11 +880,12 @@ static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
    copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque);
    copy_cb->s = s;
    copy_cb->offset = offset;
    copy_cb->backing_qiov = NULL;
    copy_cb->iov.iov_base = qemu_blockalign(s->bs, len);
    copy_cb->iov.iov_len = len;
    qemu_iovec_init_external(&copy_cb->qiov, &copy_cb->iov, 1);
-    qed_read_backing_file(s, pos, &copy_cb->qiov,
+    qed_read_backing_file(s, pos, &copy_cb->qiov, &copy_cb->backing_qiov,
                          qed_copy_from_backing_file_write, copy_cb);
 }
@ -1313,7 +1328,7 @@ static void qed_aio_read_data(void *opaque, int ret,
        return;
    } else if (ret != QED_CLUSTER_FOUND) {
        qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
-                              qed_aio_next_io, acb);
+                              &acb->backing_qiov, qed_aio_next_io, acb);
        return;
    }
@ -1339,6 +1354,12 @@ static void qed_aio_next_io(void *opaque, int ret)
    trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size);
    if (acb->backing_qiov) {
        qemu_iovec_destroy(acb->backing_qiov);
        g_free(acb->backing_qiov);
        acb->backing_qiov = NULL;
    }
    /* Handle I/O error */
    if (ret) {
        qed_aio_complete(acb, ret);
@ -1378,6 +1399,7 @@ static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
    acb->qiov_offset = 0;
    acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
    acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE;
    acb->backing_qiov = NULL;
    acb->request.l2_table = NULL;
    qemu_iovec_init(&acb->cur_qiov, qiov->niov);
--- a/block/qed.h
+++ b/block/qed.h
@ -142,6 +142,7 @@ typedef struct QEDAIOCB {
    /* Current cluster scatter-gather list */
    QEMUIOVector cur_qiov;
    QEMUIOVector *backing_qiov;
    uint64_t cur_pos;               /* position on block device, in bytes */
    uint64_t cur_cluster;           /* cluster offset in image file */
    unsigned int cur_nclusters;     /* number of clusters being accessed */
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@ -790,6 +790,7 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
            memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
            p += aiocb->aio_iov[i].iov_len;
        }
        assert(p - buf == aiocb->aio_nbytes);
    }
    nbytes = handle_aiocb_rw_linear(aiocb, buf);
@ -804,9 +805,11 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
                copy = aiocb->aio_iov[i].iov_len;
            }
            memcpy(aiocb->aio_iov[i].iov_base, p, copy);
            assert(count >= copy);
            p     += copy;
            count -= copy;
        }
        assert(count == 0);
    }
    qemu_vfree(buf);
@ -993,12 +996,14 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
    acb->aio_type = type;
    acb->aio_fildes = fd;
    acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
    acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
    if (qiov) {
        acb->aio_iov = qiov->iov;
        acb->aio_niov = qiov->niov;
        assert(qiov->size == acb->aio_nbytes);
    }
    acb->aio_nbytes = nb_sectors * 512;
    acb->aio_offset = sector_num * 512;
    trace_paio_submit_co(sector_num, nb_sectors, type);
    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
@ -1016,12 +1021,14 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
    acb->aio_type = type;
    acb->aio_fildes = fd;
    acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
    acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
    if (qiov) {
        acb->aio_iov = qiov->iov;
        acb->aio_niov = qiov->niov;
        assert(qiov->size == acb->aio_nbytes);
    }
    acb->aio_nbytes = nb_sectors * 512;
    acb->aio_offset = sector_num * 512;
    trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
--- a/blockjob.c
+++ b/blockjob.c
@ -187,7 +187,7 @@ int block_job_cancel_sync(BlockJob *job)
    job->opaque = &data;
    block_job_cancel(job);
    while (data.ret == -EINPROGRESS) {
-        qemu_aio_wait();
+        aio_poll(bdrv_get_aio_context(bs), true);
    }
    return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
 }
--- a/dma-helpers.c
+++ b/dma-helpers.c
@ -170,6 +170,10 @@ static void dma_bdrv_cb(void *opaque, int ret)
        return;
    }
    if (dbs->iov.size & ~BDRV_SECTOR_MASK) {
        qemu_iovec_discard_back(&dbs->iov, dbs->iov.size & ~BDRV_SECTOR_MASK);
    }
    dbs->acb = dbs->io_func(dbs->bs, dbs->sector_num, &dbs->iov,
                            dbs->iov.size / 512, dma_bdrv_cb, dbs);
    assert(dbs->acb);
--- a/docs/aio_notify.promela
+++ b/docs/aio_notify.promela
@ -0,0 +1,104 @@
 /*
 * This model describes the interaction between aio_set_dispatching()
 * and aio_notify().
 *
 * Author: Paolo Bonzini <pbonzini@redhat.com>
 *
 * This file is in the public domain.  If you really want a license,
 * the WTFPL will do.
 *
 * To simulate it:
 *     spin -p docs/aio_notify.promela
 *
 * To verify it:
 *     spin -a docs/aio_notify.promela
 *     gcc -O2 pan.c
 *     ./a.out -a
 */
 #define MAX   4
 #define LAST  (1 << (MAX - 1))
 #define FINAL ((LAST << 1) - 1)
 bool dispatching;
 bool event;
 int req, done;
 active proctype waiter()
 {
     int fetch, blocking;
     do
        :: done != FINAL -> {
            // Computing "blocking" is separate from execution of the
            // "bottom half"
            blocking = (req == 0);
            // This is our "bottom half"
            atomic { fetch = req; req = 0; }
            done = done | fetch;
            // Wait for a nudge from the other side
            do
                :: event == 1 -> { event = 0; break; }
                :: !blocking  -> break;
            od;
            dispatching = 1;
            // If you are simulating this model, you may want to add
            // something like this here:
            //
            //      int foo; foo++; foo++; foo++;
            //
            // This only wastes some time and makes it more likely
            // that the notifier process hits the "fast path".
            dispatching = 0;
        }
        :: else -> break;
    od
 }
 active proctype notifier()
 {
    int next = 1;
    int sets = 0;
    do
        :: next <= LAST -> {
            // generate a request
            req = req | next;
            next = next << 1;
            // aio_notify
            if
                :: dispatching == 0 -> sets++; event = 1;
                :: else             -> skip;
            fi;
            // Test both synchronous and asynchronous delivery
            if
                :: 1 -> do
                            :: req == 0 -> break;
                        od;
                :: 1 -> skip;
            fi;
        }
        :: else -> break;
    od;
    printf("Skipped %d event_notifier_set\n", MAX - sets);
 }
 #define p (done == FINAL)
 never  {
    do
        :: 1                      // after an arbitrarily long prefix
        :: p -> break             // p becomes true
    od;
    do
        :: !p -> accept: break    // it then must remains true forever after
    od
 }
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@ -65,43 +65,41 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
 {
    stb_p(&req->in->status, status);
-    vring_push(&req->dev->dataplane->vring, req->elem,
+    vring_push(&req->dev->dataplane->vring, &req->elem,
               req->qiov.size + sizeof(*req->in));
    notify_guest(req->dev->dataplane);
    g_slice_free(VirtIOBlockReq, req);
 }
 static void handle_notify(EventNotifier *e)
 {
    VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
                                           host_notifier);
-
+    VirtIOBlock *vblk = VIRTIO_BLK(s->vdev);
    VirtQueueElement *elem;
    VirtIOBlockReq *req;
    int ret;
    MultiReqBuffer mrb = {
        .num_writes = 0,
    };
    event_notifier_test_and_clear(&s->host_notifier);
    bdrv_io_plug(s->blk->conf.bs);
    for (;;) {
        MultiReqBuffer mrb = {
            .num_writes = 0,
        };
        int ret;
        /* Disable guest->host notifies to avoid unnecessary vmexits */
        vring_disable_notification(s->vdev, &s->vring);
        for (;;) {
-            ret = vring_pop(s->vdev, &s->vring, &elem);
+            VirtIOBlockReq *req = virtio_blk_alloc_request(vblk);
            ret = vring_pop(s->vdev, &s->vring, &req->elem);
            if (ret < 0) {
-                assert(elem == NULL);
+                virtio_blk_free_request(req);
                break; /* no more requests */
            }
-            trace_virtio_blk_data_plane_process_request(s, elem->out_num,
+            trace_virtio_blk_data_plane_process_request(s, req->elem.out_num,
-                                                        elem->in_num, elem->index);
+                                                        req->elem.in_num,
                                                        req->elem.index);
            req = g_slice_new(VirtIOBlockReq);
            req->dev = VIRTIO_BLK(s->vdev);
            req->elem = elem;
            virtio_blk_handle_request(req, &mrb);
        }
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@ -29,18 +29,18 @@
 #include "hw/virtio/virtio-bus.h"
 #include "hw/virtio/virtio-access.h"
-static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
+VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
 {
-    VirtIOBlockReq *req = g_slice_new0(VirtIOBlockReq);
+    VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
    req->dev = s;
-    req->elem = g_slice_new0(VirtQueueElement);
+    req->qiov.size = 0;
    req->next = NULL;
    return req;
 }
-static void virtio_blk_free_request(VirtIOBlockReq *req)
+void virtio_blk_free_request(VirtIOBlockReq *req)
 {
    if (req) {
        g_slice_free(VirtQueueElement, req->elem);
        g_slice_free(VirtIOBlockReq, req);
    }
 }
@ -54,7 +54,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
    trace_virtio_blk_req_complete(req, status);
    stb_p(&req->in->status, status);
-    virtqueue_push(s->vq, req->elem, req->qiov.size + sizeof(*req->in));
+    virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
    virtio_notify(vdev, s->vq);
 }
@ -119,7 +119,7 @@ static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
 {
    VirtIOBlockReq *req = virtio_blk_alloc_request(s);
-    if (!virtqueue_pop(s->vq, req->elem)) {
+    if (!virtqueue_pop(s->vq, &req->elem)) {
        virtio_blk_free_request(req);
        return NULL;
    }
@ -252,7 +252,7 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
 {
    int status;
-    status = virtio_blk_handle_scsi_req(req->dev, req->elem);
+    status = virtio_blk_handle_scsi_req(req->dev, &req->elem);
    virtio_blk_req_complete(req, status);
    virtio_blk_free_request(req);
 }
@ -288,6 +288,25 @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
    bdrv_aio_flush(req->dev->bs, virtio_blk_flush_complete, req);
 }
 static bool virtio_blk_sect_range_ok(VirtIOBlock *dev,
                                     uint64_t sector, size_t size)
 {
    uint64_t nb_sectors = size >> BDRV_SECTOR_BITS;
    uint64_t total_sectors;
    if (sector & dev->sector_mask) {
        return false;
    }
    if (size % dev->conf->logical_block_size) {
        return false;
    }
    bdrv_get_geometry(dev->bs, &total_sectors);
    if (sector > total_sectors || nb_sectors > total_sectors - sector) {
        return false;
    }
    return true;
 }
 static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
 {
    BlockRequest *blkreq;
@ -295,19 +314,16 @@ static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
    sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector);
    bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE);
    trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512);
-    if (sector & req->dev->sector_mask) {
+    if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) {
-        virtio_blk_rw_complete(req, -EIO);
+        virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
-        return;
+        virtio_blk_free_request(req);
    }
    if (req->qiov.size % req->dev->conf->logical_block_size) {
        virtio_blk_rw_complete(req, -EIO);
        return;
    }
    bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE);
    if (mrb->num_writes == 32) {
        virtio_submit_multiwrite(req->dev->bs, mrb);
    }
@ -329,18 +345,15 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req)
    sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector);
    bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ);
    trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512);
-    if (sector & req->dev->sector_mask) {
+    if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) {
-        virtio_blk_rw_complete(req, -EIO);
+        virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
-        return;
+        virtio_blk_free_request(req);
    }
    if (req->qiov.size % req->dev->conf->logical_block_size) {
        virtio_blk_rw_complete(req, -EIO);
        return;
    }
    bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ);
    bdrv_aio_readv(req->dev->bs, sector, &req->qiov,
                   req->qiov.size / BDRV_SECTOR_SIZE,
                   virtio_blk_rw_complete, req);
@ -349,12 +362,12 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req)
 void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
 {
    uint32_t type;
-    struct iovec *in_iov = req->elem->in_sg;
+    struct iovec *in_iov = req->elem.in_sg;
-    struct iovec *iov = req->elem->out_sg;
+    struct iovec *iov = req->elem.out_sg;
-    unsigned in_num = req->elem->in_num;
+    unsigned in_num = req->elem.in_num;
-    unsigned out_num = req->elem->out_num;
+    unsigned out_num = req->elem.out_num;
-    if (req->elem->out_num < 1 || req->elem->in_num < 1) {
+    if (req->elem.out_num < 1 || req->elem.in_num < 1) {
        error_report("virtio-blk missing headers");
        exit(1);
    }
@ -391,19 +404,19 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
         * NB: per existing s/n string convention the string is
         * terminated by '\0' only when shorter than buffer.
         */
-        strncpy(req->elem->in_sg[0].iov_base,
+        strncpy(req->elem.in_sg[0].iov_base,
                s->blk.serial ? s->blk.serial : "",
-                MIN(req->elem->in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES));
+                MIN(req->elem.in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES));
        virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
        virtio_blk_free_request(req);
    } else if (type & VIRTIO_BLK_T_OUT) {
-        qemu_iovec_init_external(&req->qiov, &req->elem->out_sg[1],
+        qemu_iovec_init_external(&req->qiov, &req->elem.out_sg[1],
-                                 req->elem->out_num - 1);
+                                 req->elem.out_num - 1);
        virtio_blk_handle_write(req, mrb);
    } else if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_BARRIER) {
        /* VIRTIO_BLK_T_IN is 0, so we can't just & it. */
-        qemu_iovec_init_external(&req->qiov, &req->elem->in_sg[0],
+        qemu_iovec_init_external(&req->qiov, &req->elem.in_sg[0],
-                                 req->elem->in_num - 1);
+                                 req->elem.in_num - 1);
        virtio_blk_handle_read(req);
    } else {
        virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
@ -627,7 +640,7 @@ static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f)
    while (req) {
        qemu_put_sbyte(f, 1);
-        qemu_put_buffer(f, (unsigned char *)req->elem,
+        qemu_put_buffer(f, (unsigned char *)&req->elem,
                        sizeof(VirtQueueElement));
        req = req->next;
    }
@ -652,15 +665,15 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
    while (qemu_get_sbyte(f)) {
        VirtIOBlockReq *req = virtio_blk_alloc_request(s);
-        qemu_get_buffer(f, (unsigned char *)req->elem,
+        qemu_get_buffer(f, (unsigned char *)&req->elem,
                        sizeof(VirtQueueElement));
        req->next = s->rq;
        s->rq = req;
-        virtqueue_map_sg(req->elem->in_sg, req->elem->in_addr,
+        virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr,
-            req->elem->in_num, 1);
+            req->elem.in_num, 1);
-        virtqueue_map_sg(req->elem->out_sg, req->elem->out_addr,
+        virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr,
-            req->elem->out_num, 0);
+            req->elem.out_num, 0);
    }
    return 0;
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@ -499,6 +499,18 @@ static void ide_rw_error(IDEState *s) {
    ide_set_irq(s->bus);
 }
 static bool ide_sect_range_ok(IDEState *s,
                              uint64_t sector, uint64_t nb_sectors)
 {
    uint64_t total_sectors;
    bdrv_get_geometry(s->bs, &total_sectors);
    if (sector > total_sectors || nb_sectors > total_sectors - sector) {
        return false;
    }
    return true;
 }
 static void ide_sector_read_cb(void *opaque, int ret)
 {
    IDEState *s = opaque;
@ -554,6 +566,11 @@ void ide_sector_read(IDEState *s)
    printf("sector=%" PRId64 "\n", sector_num);
 #endif
    if (!ide_sect_range_ok(s, sector_num, n)) {
        ide_rw_error(s);
        return;
    }
    s->iov.iov_base = s->io_buffer;
    s->iov.iov_len  = n * BDRV_SECTOR_SIZE;
    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
@ -671,6 +688,12 @@ void ide_dma_cb(void *opaque, int ret)
           sector_num, n, s->dma_cmd);
 #endif
    if (!ide_sect_range_ok(s, sector_num, n)) {
        dma_buf_commit(s);
        ide_dma_error(s);
        return;
    }
    switch (s->dma_cmd) {
    case IDE_DMA_READ:
        s->bus->dma->aiocb = dma_bdrv_read(s->bs, &s->sg, sector_num,
@ -790,6 +813,11 @@ void ide_sector_write(IDEState *s)
        n = s->req_nb_sectors;
    }
    if (!ide_sect_range_ok(s, sector_num, n)) {
        ide_rw_error(s);
        return;
    }
    s->iov.iov_base = s->io_buffer;
    s->iov.iov_len  = n * BDRV_SECTOR_SIZE;
    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
--- a/hw/virtio/dataplane/vring.c
+++ b/hw/virtio/dataplane/vring.c
@ -272,7 +272,7 @@ static int get_indirect(Vring *vring, VirtQueueElement *elem,
    return 0;
 }
-void vring_free_element(VirtQueueElement *elem)
+static void vring_unmap_element(VirtQueueElement *elem)
 {
    int i;
@ -287,8 +287,6 @@ void vring_free_element(VirtQueueElement *elem)
    for (i = 0; i < elem->in_num; i++) {
        vring_unmap(elem->in_sg[i].iov_base, true);
    }
    g_slice_free(VirtQueueElement, elem);
 }
 /* This looks in the virtqueue and for the first available buffer, and converts
@ -303,14 +301,16 @@ void vring_free_element(VirtQueueElement *elem)
 * Stolen from linux/drivers/vhost/vhost.c.
 */
 int vring_pop(VirtIODevice *vdev, Vring *vring,
-              VirtQueueElement **p_elem)
+              VirtQueueElement *elem)
 {
    struct vring_desc desc;
    unsigned int i, head, found = 0, num = vring->vr.num;
    uint16_t avail_idx, last_avail_idx;
    VirtQueueElement *elem = NULL;
    int ret;
    /* Initialize elem so it can be safely unmapped */
    elem->in_num = elem->out_num = 0;
    /* If there was a fatal error then refuse operation */
    if (vring->broken) {
        ret = -EFAULT;
@ -342,10 +342,8 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
     * the index we've seen. */
    head = vring->vr.avail->ring[last_avail_idx % num];
    elem = g_slice_new(VirtQueueElement);
    elem->index = head;
-    elem->in_num = elem->out_num = 0;
+
    /* If their number is silly, that's an error. */
    if (unlikely(head >= num)) {
        error_report("Guest says index %u > %u is available", head, num);
@ -393,7 +391,6 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
    /* On success, increment avail index. */
    vring->last_avail_idx++;
    *p_elem = elem;
    return head;
 out:
@ -401,10 +398,7 @@ out:
    if (ret == -EFAULT) {
        vring->broken = true;
    }
-    if (elem) {
+    vring_unmap_element(elem);
        vring_free_element(elem);
    }
    *p_elem = NULL;
    return ret;
 }
@ -418,7 +412,7 @@ void vring_push(Vring *vring, VirtQueueElement *elem, int len)
    unsigned int head = elem->index;
    uint16_t new;
-    vring_free_element(elem);
+    vring_unmap_element(elem);
    /* Don't touch vring if a fatal error occurred */
    if (vring->broken) {
--- a/include/block/aio.h
+++ b/include/block/aio.h
@ -60,8 +60,14 @@ struct AioContext {
     */
    int walking_handlers;
    /* Used to avoid unnecessary event_notifier_set calls in aio_notify.
     * Writes protected by lock or BQL, reads are lockless.
     */
    bool dispatching;
    /* lock to protect between bh's adders and deleter */
    QemuMutex bh_lock;
    /* Anchor of the list of Bottom Halves belonging to the context */
    struct QEMUBH *first_bh;
@ -83,6 +89,9 @@ struct AioContext {
    QEMUTimerListGroup tlg;
 };
 /* Used internally to synchronize aio_poll against qemu_bh_schedule.  */
 void aio_set_dispatching(AioContext *ctx, bool dispatching);
 /**
 * aio_context_new: Allocate a new AioContext.
 *
@ -205,9 +214,9 @@ bool aio_pending(AioContext *ctx);
 /* Progress in completing AIO work to occur.  This can issue new pending
 * aio as a result of executing I/O completion or bh callbacks.
 *
- * If there is no pending AIO operation or completion (bottom half),
+ * Return whether any progress was made by executing AIO or bottom half
- * return false.  If there are pending AIO operations of bottom halves,
+ * handlers.  If @blocking == true, this should always be true except
- * return true.
+ * if someone called aio_notify.
 *
 * If there are no pending bottom halves, but there are pending AIO
 * operations, it may not be possible to make any progress without
@ -220,7 +229,7 @@ bool aio_poll(AioContext *ctx, bool blocking);
 #ifdef CONFIG_POSIX
 /* Register a file descriptor and associated callbacks.  Behaves very similarly
 * to qemu_set_fd_handler2.  Unlike qemu_set_fd_handler2, these callbacks will
- * be invoked when using qemu_aio_wait().
+ * be invoked when using aio_poll().
 *
 * Code that invokes AIO completion functions should rely on this function
 * instead of qemu_set_fd_handler[2].
@ -234,7 +243,7 @@ void aio_set_fd_handler(AioContext *ctx,
 /* Register an event notifier and associated callbacks.  Behaves very similarly
 * to event_notifier_set_handler.  Unlike event_notifier_set_handler, these callbacks
- * will be invoked when using qemu_aio_wait().
+ * will be invoked when using aio_poll().
 *
 * Code that invokes AIO completion functions should rely on this function
 * instead of event_notifier_set_handler.
@ -251,19 +260,6 @@ GSource *aio_get_g_source(AioContext *ctx);
 /* Return the ThreadPool bound to this AioContext */
 struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
 /* Functions to operate on the main QEMU AioContext.  */
 bool qemu_aio_wait(void);
 void qemu_aio_set_event_notifier(EventNotifier *notifier,
                                 EventNotifierHandler *io_read);
 #ifdef CONFIG_POSIX
 void qemu_aio_set_fd_handler(int fd,
                             IOHandler *io_read,
                             IOHandler *io_write,
                             void *opaque);
 #endif
 /**
 * aio_timer_new:
 * @ctx: the aio context
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@ -74,7 +74,7 @@ struct BlockJob {
     * Set to true if the job should cancel itself.  The flag must
     * always be tested just before toggling the busy flag from false
     * to true.  After a job has been cancelled, it should only yield
-     * if #qemu_aio_wait will ("sooner or later") reenter the coroutine.
+     * if #aio_poll will ("sooner or later") reenter the coroutine.
     */
    bool cancelled;
@ -87,7 +87,7 @@ struct BlockJob {
    /**
     * Set to false by the job while it is in a quiescent state, where
     * no I/O is pending and the job has yielded on any condition
-     * that is not detected by #qemu_aio_wait, such as a timer.
+     * that is not detected by #aio_poll, such as a timer.
     */
    bool busy;
--- a/include/block/coroutine.h
+++ b/include/block/coroutine.h
@ -212,7 +212,7 @@ void coroutine_fn co_sleep_ns(QEMUClockType type, int64_t ns);
 * Yield the coroutine for a given duration
 *
 * Behaves similarly to co_sleep_ns(), but the sleeping coroutine will be
- * resumed when using qemu_aio_wait().
+ * resumed when using aio_poll().
 */
 void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
                                  int64_t ns);
--- a/include/hw/virtio/dataplane/vring.h
+++ b/include/hw/virtio/dataplane/vring.h
@ -53,8 +53,7 @@ void vring_teardown(Vring *vring, VirtIODevice *vdev, int n);
 void vring_disable_notification(VirtIODevice *vdev, Vring *vring);
 bool vring_enable_notification(VirtIODevice *vdev, Vring *vring);
 bool vring_should_notify(VirtIODevice *vdev, Vring *vring);
-int vring_pop(VirtIODevice *vdev, Vring *vring, VirtQueueElement **elem);
+int vring_pop(VirtIODevice *vdev, Vring *vring, VirtQueueElement *elem);
 void vring_push(Vring *vring, VirtQueueElement *elem, int len);
 void vring_free_element(VirtQueueElement *elem);
 #endif /* VRING_H */
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@ -144,7 +144,7 @@ typedef struct MultiReqBuffer {
 typedef struct VirtIOBlockReq {
    VirtIOBlock *dev;
-    VirtQueueElement *elem;
+    VirtQueueElement elem;
    struct virtio_blk_inhdr *in;
    struct virtio_blk_outhdr out;
    QEMUIOVector qiov;
@ -152,6 +152,10 @@ typedef struct VirtIOBlockReq {
    BlockAcctCookie acct;
 } VirtIOBlockReq;
 VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s);
 void virtio_blk_free_request(VirtIOBlockReq *req);
 int virtio_blk_handle_scsi_req(VirtIOBlock *blk,
                               VirtQueueElement *elem);
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@ -329,6 +329,7 @@ size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
                         int fillc, size_t bytes);
 ssize_t qemu_iovec_compare(QEMUIOVector *a, QEMUIOVector *b);
 void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf);
 void qemu_iovec_discard_back(QEMUIOVector *qiov, size_t bytes);
 bool buffer_is_zero(const void *buf, size_t len);
--- a/iothread.c
+++ b/iothread.c
@ -30,6 +30,7 @@ typedef ObjectClass IOThreadClass;
 static void *iothread_run(void *opaque)
 {
    IOThread *iothread = opaque;
    bool blocking;
    qemu_mutex_lock(&iothread->init_done_lock);
    iothread->thread_id = qemu_get_thread_id();
@ -38,8 +39,10 @@ static void *iothread_run(void *opaque)
    while (!iothread->stopping) {
        aio_context_acquire(iothread->ctx);
-        while (!iothread->stopping && aio_poll(iothread->ctx, true)) {
+        blocking = true;
        while (!iothread->stopping && aio_poll(iothread->ctx, blocking)) {
            /* Progress was made, keep going */
            blocking = false;
        }
        aio_context_release(iothread->ctx);
    }
--- a/main-loop.c
+++ b/main-loop.c
@ -498,24 +498,3 @@ QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
 {
    return aio_bh_new(qemu_aio_context, cb, opaque);
 }
 bool qemu_aio_wait(void)
 {
    return aio_poll(qemu_aio_context, true);
 }
 #ifdef CONFIG_POSIX
 void qemu_aio_set_fd_handler(int fd,
                             IOHandler *io_read,
                             IOHandler *io_write,
                             void *opaque)
 {
    aio_set_fd_handler(qemu_aio_context, fd, io_read, io_write, opaque);
 }
 #endif
 void qemu_aio_set_event_notifier(EventNotifier *notifier,
                                 EventNotifierHandler *io_read)
 {
    aio_set_event_notifier(qemu_aio_context, notifier, io_read);
 }
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@ -483,7 +483,7 @@ static int do_co_write_zeroes(BlockDriverState *bs, int64_t offset, int count,
    co = qemu_coroutine_create(co_write_zeroes_entry);
    qemu_coroutine_enter(co, &data);
    while (!data.done) {
-        qemu_aio_wait();
+        aio_poll(bdrv_get_aio_context(bs), true);
    }
    if (data.ret < 0) {
        return data.ret;
@ -2027,7 +2027,7 @@ static const cmdinfo_t resume_cmd = {
 static int wait_break_f(BlockDriverState *bs, int argc, char **argv)
 {
    while (!bdrv_debug_is_suspended(bs, argv[1])) {
-        qemu_aio_wait();
+        aio_poll(bdrv_get_aio_context(bs), true);
    }
    return 0;
--- a/tests/qemu-iotests/028
+++ b/tests/qemu-iotests/028
@ -33,7 +33,8 @@ status=1	# failure is the default!
 _cleanup()
 {
-	_cleanup_test_img
+    rm -f "${TEST_IMG}.copy"
    _cleanup_test_img
 }
 trap "_cleanup; exit \$status" 0 1 2 3 15
@ -41,6 +42,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 . ./common.rc
 . ./common.filter
 . ./common.pattern
 . ./common.qemu
 # Any format supporting backing files except vmdk and qcow which do not support
 # smaller backing files.
@ -99,6 +101,29 @@ _check_test_img
 # Rebase it on top of its base image
 $QEMU_IMG rebase -b "$TEST_IMG.base" "$TEST_IMG"
 echo
 echo block-backup
 echo
 qemu_comm_method="monitor"
 _launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},id=disk
 h=$QEMU_HANDLE
 QEMU_COMM_TIMEOUT=1
 _send_qemu_cmd $h "drive_backup disk ${TEST_IMG}.copy" "(qemu)"
 qemu_cmd_repeat=20 _send_qemu_cmd $h "info block-jobs" "No active jobs"
 _send_qemu_cmd $h 'quit' ""
 # Base image sectors
 TEST_IMG="${TEST_IMG}.copy" io readv $(( offset )) 512 1024 32
 # Image sectors
 TEST_IMG="${TEST_IMG}.copy" io readv $(( offset + 512 )) 512 1024 64
 # Zero sectors beyond end of base image
 TEST_IMG="${TEST_IMG}.copy" io_zero readv $(( offset + 32 * 1024 )) 512 1024 32
 _check_test_img
 # success, all done
--- a/tests/qemu-iotests/028.out
+++ b/tests/qemu-iotests/028.out
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@ -24,14 +24,6 @@ typedef struct {
    bool auto_set;
 } EventNotifierTestData;
 /* Wait until there are no more BHs or AIO requests */
 static void wait_for_aio(void)
 {
    while (aio_poll(ctx, true)) {
        /* Do nothing */
    }
 }
 /* Wait until event notifier becomes inactive */
 static void wait_until_inactive(EventNotifierTestData *data)
 {
@ -204,7 +196,9 @@ static void test_bh_schedule10(void)
    g_assert(aio_poll(ctx, true));
    g_assert_cmpint(data.n, ==, 2);
-    wait_for_aio();
+    while (data.n < 10) {
        aio_poll(ctx, true);
    }
    g_assert_cmpint(data.n, ==, 10);
    g_assert(!aio_poll(ctx, false));
@ -252,7 +246,9 @@ static void test_bh_delete_from_cb(void)
    qemu_bh_schedule(data1.bh);
    g_assert_cmpint(data1.n, ==, 0);
-    wait_for_aio();
+    while (data1.n < data1.max) {
        aio_poll(ctx, true);
    }
    g_assert_cmpint(data1.n, ==, data1.max);
    g_assert(data1.bh == NULL);
@ -287,7 +283,12 @@ static void test_bh_delete_from_cb_many(void)
    g_assert_cmpint(data4.n, ==, 1);
    g_assert(data1.bh == NULL);
-    wait_for_aio();
+    while (data1.n < data1.max ||
           data2.n < data2.max ||
           data3.n < data3.max ||
           data4.n < data4.max) {
        aio_poll(ctx, true);
    }
    g_assert_cmpint(data1.n, ==, data1.max);
    g_assert_cmpint(data2.n, ==, data2.max);
    g_assert_cmpint(data3.n, ==, data3.max);
@ -306,7 +307,7 @@ static void test_bh_flush(void)
    qemu_bh_schedule(data.bh);
    g_assert_cmpint(data.n, ==, 0);
-    wait_for_aio();
+    g_assert(aio_poll(ctx, true));
    g_assert_cmpint(data.n, ==, 1);
    g_assert(!aio_poll(ctx, false));
@ -806,17 +807,16 @@ static void test_source_timer_schedule(void)
    g_usleep(1 * G_USEC_PER_SEC);
    g_assert_cmpint(data.n, ==, 0);
-    g_assert(g_main_context_iteration(NULL, false));
+    g_assert(g_main_context_iteration(NULL, true));
    g_assert_cmpint(data.n, ==, 1);
    expiry += data.ns;
-    /* The comment above was not kidding when it said this wakes up itself */
+    while (data.n < 2) {
-    do {
+        g_main_context_iteration(NULL, true);
-        g_assert(g_main_context_iteration(NULL, true));
+    }
    } while (qemu_clock_get_ns(data.clock_type) <= expiry);
    g_usleep(1 * G_USEC_PER_SEC);
    g_main_context_iteration(NULL, false);
    g_assert_cmpint(data.n, ==, 2);
    g_assert(qemu_clock_get_ns(data.clock_type) > expiry);
    aio_set_fd_handler(ctx, pipefd[0], NULL, NULL, NULL);
    close(pipefd[0]);
--- a/tests/test-thread-pool.c
+++ b/tests/test-thread-pool.c
@ -83,7 +83,7 @@ static void co_test_cb(void *opaque)
    data->ret = 0;
    active--;
-    /* The test continues in test_submit_co, after qemu_aio_wait_all... */
+    /* The test continues in test_submit_co, after aio_poll... */
 }
 static void test_submit_co(void)
@ -98,7 +98,7 @@ static void test_submit_co(void)
    g_assert_cmpint(active, ==, 1);
    g_assert_cmpint(data.ret, ==, -EINPROGRESS);
-    /* qemu_aio_wait_all will execute the rest of the coroutine.  */
+    /* aio_poll will execute the rest of the coroutine.  */
    while (data.ret == -EINPROGRESS) {
        aio_poll(ctx, true);
--- a/util/iov.c
+++ b/util/iov.c
@ -550,3 +550,16 @@ size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt,
    return total;
 }
 void qemu_iovec_discard_back(QEMUIOVector *qiov, size_t bytes)
 {
    size_t total;
    unsigned int niov = qiov->niov;
    assert(qiov->size >= bytes);
    total = iov_discard_back(qiov->iov, &niov, bytes);
    assert(total == bytes);
    qiov->niov = niov;
    qiov->size -= bytes;
 }