Block patches for 2.1.0-rc2 (v2)

-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.22 (GNU/Linux)
 
 iQIcBAABAgAGBQJTw6scAAoJEH8JsnLIjy/WWk8QAMmThVQhJqajBbriVJWfB5w4
 A4ZaRpk+NsDUZhzbsVBJ06ZdlQSX76JOT17V1hDFe7wcu1Aq8XL2b0PzV4iDNRgJ
 QPjW7vKAfFq/ANziGfgfAhpeQkq8o5+R8OepmdqJXzIDCjg+nDtJVdsc7G8DIWOx
 1ssEqW8zaRuZMck61mo3hpdCCA+m+3HTHhcDEf/lzS69XHi2+BZ6ATVh3zShJqxy
 +68hDHdPNMTWESkN5MBQAo82flbdmNjqpD5SYYuJsoCOV+Tb5jLUGbED/VM1LqCf
 8ukXlu4TO0u3ZmO+3XeQJiNsBSQEGOp9/9gOYj8J7AaZcZzC4cON3RmiYLZLgapj
 zdKIvgCxuFzyFheJYPCwpr3483w6/mh4uMzasZ+jHETqieAyvy1L860FNdWQaXoX
 K96m/1yIaQ2NOogcWrxZZ4Jt/diKh+NWynBFm8MZON6MK46FLiCcma2ZedoX6dNc
 R+Ul0qiYMo5B9fX05uhf15dU8cmVQuVFRo2ftIIqxZDY9IPjJjrJPw9EjHajGIJb
 MpU25NRHCdf0BscgYufuf1W9llasl0fbAd3SIA4FccTFdAeDwu5SQXHTodhu64hh
 7gf23N7Let/Gnucxx7gOTi1Jz3uR8V7MbIYRgBBvyRqhNRRRjYDwpg2c8guCzM2F
 VObGeTRdNa9QhTXqBPdQ
 =YxHy
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block patches for 2.1.0-rc2 (v2)

# gpg: Signature made Mon 14 Jul 2014 11:04:12 BST using RSA key ID C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"

* remotes/kevin/tags/for-upstream: (22 commits)
  ide: Treat read/write beyond end as invalid
  virtio-blk: Treat read/write beyond end as invalid
  virtio-blk: Bypass error action and I/O accounting on invalid r/w
  virtio-blk: Factor common checks out of virtio_blk_handle_read/write()
  dma-helpers: Fix too long qiov
  qtest: fix vhost-user-test compilation with old GLib
  tests: Fix unterminated string output visitor enum human string
  AioContext: do not rely on aio_poll(ctx, true) result to end a loop
  virtio-blk: embed VirtQueueElement in VirtIOBlockReq
  virtio-blk: avoid g_slice_new0() for VirtIOBlockReq and VirtQueueElement
  dataplane: do not free VirtQueueElement in vring_push()
  virtio-blk: avoid dataplane VirtIOBlockReq early free
  block: Assert qiov length matches request length
  qed: Make qiov match request size until backing file EOF
  qcow2: Make qiov match request size until backing file EOF
  block: Make qiov match the request size until EOF
  AioContext: speed up aio_notify
  test-aio: fix GSource-based timer test
  block: drop aio functions that operate on the main AioContext
  block: prefer aio_poll to qemu_aio_wait
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2014-07-14 13:09:29 +01:00
commit 7a6d04e73f
30 changed files with 699 additions and 167 deletions

View File

@ -125,7 +125,7 @@ static bool aio_dispatch(AioContext *ctx)
bool progress = false; bool progress = false;
/* /*
* We have to walk very carefully in case qemu_aio_set_fd_handler is * We have to walk very carefully in case aio_set_fd_handler is
* called while we're walking. * called while we're walking.
*/ */
node = QLIST_FIRST(&ctx->aio_handlers); node = QLIST_FIRST(&ctx->aio_handlers);
@ -175,27 +175,56 @@ static bool aio_dispatch(AioContext *ctx)
bool aio_poll(AioContext *ctx, bool blocking) bool aio_poll(AioContext *ctx, bool blocking)
{ {
AioHandler *node; AioHandler *node;
bool was_dispatching;
int ret; int ret;
bool progress; bool progress;
was_dispatching = ctx->dispatching;
progress = false; progress = false;
/* aio_notify can avoid the expensive event_notifier_set if
* everything (file descriptors, bottom halves, timers) will
* be re-evaluated before the next blocking poll(). This happens
* in two cases:
*
* 1) when aio_poll is called with blocking == false
*
* 2) when we are called after poll(). If we are called before
* poll(), bottom halves will not be re-evaluated and we need
* aio_notify() if blocking == true.
*
* The first aio_dispatch() only does something when AioContext is
* running as a GSource, and in that case aio_poll is used only
* with blocking == false, so this optimization is already quite
* effective. However, the code is ugly and should be restructured
* to have a single aio_dispatch() call. To do this, we need to
* reorganize aio_poll into a prepare/poll/dispatch model like
* glib's.
*
* If we're in a nested event loop, ctx->dispatching might be true.
* In that case we can restore it just before returning, but we
* have to clear it now.
*/
aio_set_dispatching(ctx, !blocking);
/* /*
* If there are callbacks left that have been queued, we need to call them. * If there are callbacks left that have been queued, we need to call them.
* Do not call select in this case, because it is possible that the caller * Do not call select in this case, because it is possible that the caller
* does not need a complete flush (as is the case for qemu_aio_wait loops). * does not need a complete flush (as is the case for aio_poll loops).
*/ */
if (aio_bh_poll(ctx)) { if (aio_bh_poll(ctx)) {
blocking = false; blocking = false;
progress = true; progress = true;
} }
/* Re-evaluate condition (1) above. */
aio_set_dispatching(ctx, !blocking);
if (aio_dispatch(ctx)) { if (aio_dispatch(ctx)) {
progress = true; progress = true;
} }
if (progress && !blocking) { if (progress && !blocking) {
return true; goto out;
} }
ctx->walking_handlers++; ctx->walking_handlers++;
@ -234,9 +263,12 @@ bool aio_poll(AioContext *ctx, bool blocking)
} }
/* Run dispatch even if there were no readable fds to run timers */ /* Run dispatch even if there were no readable fds to run timers */
aio_set_dispatching(ctx, true);
if (aio_dispatch(ctx)) { if (aio_dispatch(ctx)) {
progress = true; progress = true;
} }
out:
aio_set_dispatching(ctx, was_dispatching);
return progress; return progress;
} }

View File

@ -102,7 +102,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
/* /*
* If there are callbacks left that have been queued, we need to call then. * If there are callbacks left that have been queued, we need to call then.
* Do not call select in this case, because it is possible that the caller * Do not call select in this case, because it is possible that the caller
* does not need a complete flush (as is the case for qemu_aio_wait loops). * does not need a complete flush (as is the case for aio_poll loops).
*/ */
if (aio_bh_poll(ctx)) { if (aio_bh_poll(ctx)) {
blocking = false; blocking = false;
@ -115,7 +115,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
/* /*
* Then dispatch any pending callbacks from the GSource. * Then dispatch any pending callbacks from the GSource.
* *
* We have to walk very carefully in case qemu_aio_set_fd_handler is * We have to walk very carefully in case aio_set_fd_handler is
* called while we're walking. * called while we're walking.
*/ */
node = QLIST_FIRST(&ctx->aio_handlers); node = QLIST_FIRST(&ctx->aio_handlers);
@ -177,7 +177,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
blocking = false; blocking = false;
/* we have to walk very carefully in case /* we have to walk very carefully in case
* qemu_aio_set_fd_handler is called while we're walking */ * aio_set_fd_handler is called while we're walking */
node = QLIST_FIRST(&ctx->aio_handlers); node = QLIST_FIRST(&ctx->aio_handlers);
while (node) { while (node) {
AioHandler *tmp; AioHandler *tmp;

19
async.c
View File

@ -26,6 +26,7 @@
#include "block/aio.h" #include "block/aio.h"
#include "block/thread-pool.h" #include "block/thread-pool.h"
#include "qemu/main-loop.h" #include "qemu/main-loop.h"
#include "qemu/atomic.h"
/***********************************************************/ /***********************************************************/
/* bottom halves (can be seen as timers which expire ASAP) */ /* bottom halves (can be seen as timers which expire ASAP) */
@ -247,9 +248,25 @@ ThreadPool *aio_get_thread_pool(AioContext *ctx)
return ctx->thread_pool; return ctx->thread_pool;
} }
void aio_set_dispatching(AioContext *ctx, bool dispatching)
{
ctx->dispatching = dispatching;
if (!dispatching) {
/* Write ctx->dispatching before reading e.g. bh->scheduled.
* Optimization: this is only needed when we're entering the "unsafe"
* phase where other threads must call event_notifier_set.
*/
smp_mb();
}
}
void aio_notify(AioContext *ctx) void aio_notify(AioContext *ctx)
{ {
event_notifier_set(&ctx->notifier); /* Write e.g. bh->scheduled before reading ctx->dispatching. */
smp_mb();
if (!ctx->dispatching) {
event_notifier_set(&ctx->notifier);
}
} }
static void aio_timerlist_notify(void *opaque) static void aio_timerlist_notify(void *opaque)

22
block.c
View File

@ -471,7 +471,7 @@ int bdrv_create(BlockDriver *drv, const char* filename,
co = qemu_coroutine_create(bdrv_create_co_entry); co = qemu_coroutine_create(bdrv_create_co_entry);
qemu_coroutine_enter(co, &cco); qemu_coroutine_enter(co, &cco);
while (cco.ret == NOT_DONE) { while (cco.ret == NOT_DONE) {
qemu_aio_wait(); aio_poll(qemu_get_aio_context(), true);
} }
} }
@ -3010,6 +3010,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
assert(!qiov || bytes == qiov->size);
/* Handle Copy on Read and associated serialisation */ /* Handle Copy on Read and associated serialisation */
if (flags & BDRV_REQ_COPY_ON_READ) { if (flags & BDRV_REQ_COPY_ON_READ) {
@ -3054,8 +3055,20 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num), max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
align >> BDRV_SECTOR_BITS); align >> BDRV_SECTOR_BITS);
if (max_nb_sectors > 0) { if (max_nb_sectors > 0) {
ret = drv->bdrv_co_readv(bs, sector_num, QEMUIOVector local_qiov;
MIN(nb_sectors, max_nb_sectors), qiov); size_t local_sectors;
max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS);
local_sectors = MIN(max_nb_sectors, nb_sectors);
qemu_iovec_init(&local_qiov, qiov->niov);
qemu_iovec_concat(&local_qiov, qiov, 0,
local_sectors * BDRV_SECTOR_SIZE);
ret = drv->bdrv_co_readv(bs, sector_num, local_sectors,
&local_qiov);
qemu_iovec_destroy(&local_qiov);
} else { } else {
ret = 0; ret = 0;
} }
@ -3267,6 +3280,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
assert(!qiov || bytes == qiov->size);
waited = wait_serialising_requests(req); waited = wait_serialising_requests(req);
assert(!waited || !req->serialising); assert(!waited || !req->serialising);
@ -4040,7 +4054,7 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
if (ret < 0) { if (ret < 0) {
return ret; return ret;
} }
return (ret & BDRV_BLOCK_ALLOCATED); return !!(ret & BDRV_BLOCK_ALLOCATED);
} }
/* /*

View File

@ -307,7 +307,7 @@ static void coroutine_fn backup_run(void *opaque)
BACKUP_SECTORS_PER_CLUSTER - i, &n); BACKUP_SECTORS_PER_CLUSTER - i, &n);
i += n; i += n;
if (alloced == 1) { if (alloced == 1 || n == 0) {
break; break;
} }
} }

View File

@ -1020,11 +1020,20 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov, n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov,
sector_num, cur_nr_sectors); sector_num, cur_nr_sectors);
if (n1 > 0) { if (n1 > 0) {
QEMUIOVector local_qiov;
qemu_iovec_init(&local_qiov, hd_qiov.niov);
qemu_iovec_concat(&local_qiov, &hd_qiov, 0,
n1 * BDRV_SECTOR_SIZE);
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
qemu_co_mutex_unlock(&s->lock); qemu_co_mutex_unlock(&s->lock);
ret = bdrv_co_readv(bs->backing_hd, sector_num, ret = bdrv_co_readv(bs->backing_hd, sector_num,
n1, &hd_qiov); n1, &local_qiov);
qemu_co_mutex_lock(&s->lock); qemu_co_mutex_lock(&s->lock);
qemu_iovec_destroy(&local_qiov);
if (ret < 0) { if (ret < 0) {
goto fail; goto fail;
} }

View File

@ -761,17 +761,19 @@ static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
/** /**
* Read from the backing file or zero-fill if no backing file * Read from the backing file or zero-fill if no backing file
* *
* @s: QED state * @s: QED state
* @pos: Byte position in device * @pos: Byte position in device
* @qiov: Destination I/O vector * @qiov: Destination I/O vector
* @cb: Completion function * @backing_qiov: Possibly shortened copy of qiov, to be allocated here
* @opaque: User data for completion function * @cb: Completion function
* @opaque: User data for completion function
* *
* This function reads qiov->size bytes starting at pos from the backing file. * This function reads qiov->size bytes starting at pos from the backing file.
* If there is no backing file then zeroes are read. * If there is no backing file then zeroes are read.
*/ */
static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos, static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
QEMUIOVector *qiov, QEMUIOVector *qiov,
QEMUIOVector **backing_qiov,
BlockDriverCompletionFunc *cb, void *opaque) BlockDriverCompletionFunc *cb, void *opaque)
{ {
uint64_t backing_length = 0; uint64_t backing_length = 0;
@ -804,15 +806,21 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
/* If the read straddles the end of the backing file, shorten it */ /* If the read straddles the end of the backing file, shorten it */
size = MIN((uint64_t)backing_length - pos, qiov->size); size = MIN((uint64_t)backing_length - pos, qiov->size);
assert(*backing_qiov == NULL);
*backing_qiov = g_new(QEMUIOVector, 1);
qemu_iovec_init(*backing_qiov, qiov->niov);
qemu_iovec_concat(*backing_qiov, qiov, 0, size);
BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO); BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE, bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE,
qiov, size / BDRV_SECTOR_SIZE, cb, opaque); *backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
} }
typedef struct { typedef struct {
GenericCB gencb; GenericCB gencb;
BDRVQEDState *s; BDRVQEDState *s;
QEMUIOVector qiov; QEMUIOVector qiov;
QEMUIOVector *backing_qiov;
struct iovec iov; struct iovec iov;
uint64_t offset; uint64_t offset;
} CopyFromBackingFileCB; } CopyFromBackingFileCB;
@ -829,6 +837,12 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret)
CopyFromBackingFileCB *copy_cb = opaque; CopyFromBackingFileCB *copy_cb = opaque;
BDRVQEDState *s = copy_cb->s; BDRVQEDState *s = copy_cb->s;
if (copy_cb->backing_qiov) {
qemu_iovec_destroy(copy_cb->backing_qiov);
g_free(copy_cb->backing_qiov);
copy_cb->backing_qiov = NULL;
}
if (ret) { if (ret) {
qed_copy_from_backing_file_cb(copy_cb, ret); qed_copy_from_backing_file_cb(copy_cb, ret);
return; return;
@ -866,11 +880,12 @@ static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque); copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque);
copy_cb->s = s; copy_cb->s = s;
copy_cb->offset = offset; copy_cb->offset = offset;
copy_cb->backing_qiov = NULL;
copy_cb->iov.iov_base = qemu_blockalign(s->bs, len); copy_cb->iov.iov_base = qemu_blockalign(s->bs, len);
copy_cb->iov.iov_len = len; copy_cb->iov.iov_len = len;
qemu_iovec_init_external(&copy_cb->qiov, &copy_cb->iov, 1); qemu_iovec_init_external(&copy_cb->qiov, &copy_cb->iov, 1);
qed_read_backing_file(s, pos, &copy_cb->qiov, qed_read_backing_file(s, pos, &copy_cb->qiov, &copy_cb->backing_qiov,
qed_copy_from_backing_file_write, copy_cb); qed_copy_from_backing_file_write, copy_cb);
} }
@ -1313,7 +1328,7 @@ static void qed_aio_read_data(void *opaque, int ret,
return; return;
} else if (ret != QED_CLUSTER_FOUND) { } else if (ret != QED_CLUSTER_FOUND) {
qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov, qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
qed_aio_next_io, acb); &acb->backing_qiov, qed_aio_next_io, acb);
return; return;
} }
@ -1339,6 +1354,12 @@ static void qed_aio_next_io(void *opaque, int ret)
trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size); trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size);
if (acb->backing_qiov) {
qemu_iovec_destroy(acb->backing_qiov);
g_free(acb->backing_qiov);
acb->backing_qiov = NULL;
}
/* Handle I/O error */ /* Handle I/O error */
if (ret) { if (ret) {
qed_aio_complete(acb, ret); qed_aio_complete(acb, ret);
@ -1378,6 +1399,7 @@ static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
acb->qiov_offset = 0; acb->qiov_offset = 0;
acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE; acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE; acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE;
acb->backing_qiov = NULL;
acb->request.l2_table = NULL; acb->request.l2_table = NULL;
qemu_iovec_init(&acb->cur_qiov, qiov->niov); qemu_iovec_init(&acb->cur_qiov, qiov->niov);

View File

@ -142,6 +142,7 @@ typedef struct QEDAIOCB {
/* Current cluster scatter-gather list */ /* Current cluster scatter-gather list */
QEMUIOVector cur_qiov; QEMUIOVector cur_qiov;
QEMUIOVector *backing_qiov;
uint64_t cur_pos; /* position on block device, in bytes */ uint64_t cur_pos; /* position on block device, in bytes */
uint64_t cur_cluster; /* cluster offset in image file */ uint64_t cur_cluster; /* cluster offset in image file */
unsigned int cur_nclusters; /* number of clusters being accessed */ unsigned int cur_nclusters; /* number of clusters being accessed */

View File

@ -790,6 +790,7 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len); memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
p += aiocb->aio_iov[i].iov_len; p += aiocb->aio_iov[i].iov_len;
} }
assert(p - buf == aiocb->aio_nbytes);
} }
nbytes = handle_aiocb_rw_linear(aiocb, buf); nbytes = handle_aiocb_rw_linear(aiocb, buf);
@ -804,9 +805,11 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
copy = aiocb->aio_iov[i].iov_len; copy = aiocb->aio_iov[i].iov_len;
} }
memcpy(aiocb->aio_iov[i].iov_base, p, copy); memcpy(aiocb->aio_iov[i].iov_base, p, copy);
assert(count >= copy);
p += copy; p += copy;
count -= copy; count -= copy;
} }
assert(count == 0);
} }
qemu_vfree(buf); qemu_vfree(buf);
@ -993,12 +996,14 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
acb->aio_type = type; acb->aio_type = type;
acb->aio_fildes = fd; acb->aio_fildes = fd;
acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
if (qiov) { if (qiov) {
acb->aio_iov = qiov->iov; acb->aio_iov = qiov->iov;
acb->aio_niov = qiov->niov; acb->aio_niov = qiov->niov;
assert(qiov->size == acb->aio_nbytes);
} }
acb->aio_nbytes = nb_sectors * 512;
acb->aio_offset = sector_num * 512;
trace_paio_submit_co(sector_num, nb_sectors, type); trace_paio_submit_co(sector_num, nb_sectors, type);
pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
@ -1016,12 +1021,14 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
acb->aio_type = type; acb->aio_type = type;
acb->aio_fildes = fd; acb->aio_fildes = fd;
acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
if (qiov) { if (qiov) {
acb->aio_iov = qiov->iov; acb->aio_iov = qiov->iov;
acb->aio_niov = qiov->niov; acb->aio_niov = qiov->niov;
assert(qiov->size == acb->aio_nbytes);
} }
acb->aio_nbytes = nb_sectors * 512;
acb->aio_offset = sector_num * 512;
trace_paio_submit(acb, opaque, sector_num, nb_sectors, type); trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); pool = aio_get_thread_pool(bdrv_get_aio_context(bs));

View File

@ -187,7 +187,7 @@ int block_job_cancel_sync(BlockJob *job)
job->opaque = &data; job->opaque = &data;
block_job_cancel(job); block_job_cancel(job);
while (data.ret == -EINPROGRESS) { while (data.ret == -EINPROGRESS) {
qemu_aio_wait(); aio_poll(bdrv_get_aio_context(bs), true);
} }
return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret; return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
} }

View File

@ -170,6 +170,10 @@ static void dma_bdrv_cb(void *opaque, int ret)
return; return;
} }
if (dbs->iov.size & ~BDRV_SECTOR_MASK) {
qemu_iovec_discard_back(&dbs->iov, dbs->iov.size & ~BDRV_SECTOR_MASK);
}
dbs->acb = dbs->io_func(dbs->bs, dbs->sector_num, &dbs->iov, dbs->acb = dbs->io_func(dbs->bs, dbs->sector_num, &dbs->iov,
dbs->iov.size / 512, dma_bdrv_cb, dbs); dbs->iov.size / 512, dma_bdrv_cb, dbs);
assert(dbs->acb); assert(dbs->acb);

104
docs/aio_notify.promela Normal file
View File

@ -0,0 +1,104 @@
/*
* This model describes the interaction between aio_set_dispatching()
* and aio_notify().
*
* Author: Paolo Bonzini <pbonzini@redhat.com>
*
* This file is in the public domain. If you really want a license,
* the WTFPL will do.
*
* To simulate it:
* spin -p docs/aio_notify.promela
*
* To verify it:
* spin -a docs/aio_notify.promela
* gcc -O2 pan.c
* ./a.out -a
*/
#define MAX 4
#define LAST (1 << (MAX - 1))
#define FINAL ((LAST << 1) - 1)
bool dispatching;
bool event;
int req, done;
active proctype waiter()
{
int fetch, blocking;
do
:: done != FINAL -> {
// Computing "blocking" is separate from execution of the
// "bottom half"
blocking = (req == 0);
// This is our "bottom half"
atomic { fetch = req; req = 0; }
done = done | fetch;
// Wait for a nudge from the other side
do
:: event == 1 -> { event = 0; break; }
:: !blocking -> break;
od;
dispatching = 1;
// If you are simulating this model, you may want to add
// something like this here:
//
// int foo; foo++; foo++; foo++;
//
// This only wastes some time and makes it more likely
// that the notifier process hits the "fast path".
dispatching = 0;
}
:: else -> break;
od
}
active proctype notifier()
{
int next = 1;
int sets = 0;
do
:: next <= LAST -> {
// generate a request
req = req | next;
next = next << 1;
// aio_notify
if
:: dispatching == 0 -> sets++; event = 1;
:: else -> skip;
fi;
// Test both synchronous and asynchronous delivery
if
:: 1 -> do
:: req == 0 -> break;
od;
:: 1 -> skip;
fi;
}
:: else -> break;
od;
printf("Skipped %d event_notifier_set\n", MAX - sets);
}
#define p (done == FINAL)
never {
do
:: 1 // after an arbitrarily long prefix
:: p -> break // p becomes true
od;
do
:: !p -> accept: break // it then must remains true forever after
od
}

View File

@ -65,43 +65,41 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
{ {
stb_p(&req->in->status, status); stb_p(&req->in->status, status);
vring_push(&req->dev->dataplane->vring, req->elem, vring_push(&req->dev->dataplane->vring, &req->elem,
req->qiov.size + sizeof(*req->in)); req->qiov.size + sizeof(*req->in));
notify_guest(req->dev->dataplane); notify_guest(req->dev->dataplane);
g_slice_free(VirtIOBlockReq, req);
} }
static void handle_notify(EventNotifier *e) static void handle_notify(EventNotifier *e)
{ {
VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane, VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
host_notifier); host_notifier);
VirtIOBlock *vblk = VIRTIO_BLK(s->vdev);
VirtQueueElement *elem;
VirtIOBlockReq *req;
int ret;
MultiReqBuffer mrb = {
.num_writes = 0,
};
event_notifier_test_and_clear(&s->host_notifier); event_notifier_test_and_clear(&s->host_notifier);
bdrv_io_plug(s->blk->conf.bs); bdrv_io_plug(s->blk->conf.bs);
for (;;) { for (;;) {
MultiReqBuffer mrb = {
.num_writes = 0,
};
int ret;
/* Disable guest->host notifies to avoid unnecessary vmexits */ /* Disable guest->host notifies to avoid unnecessary vmexits */
vring_disable_notification(s->vdev, &s->vring); vring_disable_notification(s->vdev, &s->vring);
for (;;) { for (;;) {
ret = vring_pop(s->vdev, &s->vring, &elem); VirtIOBlockReq *req = virtio_blk_alloc_request(vblk);
ret = vring_pop(s->vdev, &s->vring, &req->elem);
if (ret < 0) { if (ret < 0) {
assert(elem == NULL); virtio_blk_free_request(req);
break; /* no more requests */ break; /* no more requests */
} }
trace_virtio_blk_data_plane_process_request(s, elem->out_num, trace_virtio_blk_data_plane_process_request(s, req->elem.out_num,
elem->in_num, elem->index); req->elem.in_num,
req->elem.index);
req = g_slice_new(VirtIOBlockReq);
req->dev = VIRTIO_BLK(s->vdev);
req->elem = elem;
virtio_blk_handle_request(req, &mrb); virtio_blk_handle_request(req, &mrb);
} }

View File

@ -29,18 +29,18 @@
#include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-bus.h"
#include "hw/virtio/virtio-access.h" #include "hw/virtio/virtio-access.h"
static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
{ {
VirtIOBlockReq *req = g_slice_new0(VirtIOBlockReq); VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
req->dev = s; req->dev = s;
req->elem = g_slice_new0(VirtQueueElement); req->qiov.size = 0;
req->next = NULL;
return req; return req;
} }
static void virtio_blk_free_request(VirtIOBlockReq *req) void virtio_blk_free_request(VirtIOBlockReq *req)
{ {
if (req) { if (req) {
g_slice_free(VirtQueueElement, req->elem);
g_slice_free(VirtIOBlockReq, req); g_slice_free(VirtIOBlockReq, req);
} }
} }
@ -54,7 +54,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
trace_virtio_blk_req_complete(req, status); trace_virtio_blk_req_complete(req, status);
stb_p(&req->in->status, status); stb_p(&req->in->status, status);
virtqueue_push(s->vq, req->elem, req->qiov.size + sizeof(*req->in)); virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
virtio_notify(vdev, s->vq); virtio_notify(vdev, s->vq);
} }
@ -119,7 +119,7 @@ static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
{ {
VirtIOBlockReq *req = virtio_blk_alloc_request(s); VirtIOBlockReq *req = virtio_blk_alloc_request(s);
if (!virtqueue_pop(s->vq, req->elem)) { if (!virtqueue_pop(s->vq, &req->elem)) {
virtio_blk_free_request(req); virtio_blk_free_request(req);
return NULL; return NULL;
} }
@ -252,7 +252,7 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
{ {
int status; int status;
status = virtio_blk_handle_scsi_req(req->dev, req->elem); status = virtio_blk_handle_scsi_req(req->dev, &req->elem);
virtio_blk_req_complete(req, status); virtio_blk_req_complete(req, status);
virtio_blk_free_request(req); virtio_blk_free_request(req);
} }
@ -288,6 +288,25 @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
bdrv_aio_flush(req->dev->bs, virtio_blk_flush_complete, req); bdrv_aio_flush(req->dev->bs, virtio_blk_flush_complete, req);
} }
static bool virtio_blk_sect_range_ok(VirtIOBlock *dev,
uint64_t sector, size_t size)
{
uint64_t nb_sectors = size >> BDRV_SECTOR_BITS;
uint64_t total_sectors;
if (sector & dev->sector_mask) {
return false;
}
if (size % dev->conf->logical_block_size) {
return false;
}
bdrv_get_geometry(dev->bs, &total_sectors);
if (sector > total_sectors || nb_sectors > total_sectors - sector) {
return false;
}
return true;
}
static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb) static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
{ {
BlockRequest *blkreq; BlockRequest *blkreq;
@ -295,19 +314,16 @@ static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector); sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector);
bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE);
trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512); trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512);
if (sector & req->dev->sector_mask) { if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) {
virtio_blk_rw_complete(req, -EIO); virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
return; virtio_blk_free_request(req);
}
if (req->qiov.size % req->dev->conf->logical_block_size) {
virtio_blk_rw_complete(req, -EIO);
return; return;
} }
bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE);
if (mrb->num_writes == 32) { if (mrb->num_writes == 32) {
virtio_submit_multiwrite(req->dev->bs, mrb); virtio_submit_multiwrite(req->dev->bs, mrb);
} }
@ -329,18 +345,15 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req)
sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector); sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector);
bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ);
trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512); trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512);
if (sector & req->dev->sector_mask) { if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) {
virtio_blk_rw_complete(req, -EIO); virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
return; virtio_blk_free_request(req);
}
if (req->qiov.size % req->dev->conf->logical_block_size) {
virtio_blk_rw_complete(req, -EIO);
return; return;
} }
bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ);
bdrv_aio_readv(req->dev->bs, sector, &req->qiov, bdrv_aio_readv(req->dev->bs, sector, &req->qiov,
req->qiov.size / BDRV_SECTOR_SIZE, req->qiov.size / BDRV_SECTOR_SIZE,
virtio_blk_rw_complete, req); virtio_blk_rw_complete, req);
@ -349,12 +362,12 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req)
void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
{ {
uint32_t type; uint32_t type;
struct iovec *in_iov = req->elem->in_sg; struct iovec *in_iov = req->elem.in_sg;
struct iovec *iov = req->elem->out_sg; struct iovec *iov = req->elem.out_sg;
unsigned in_num = req->elem->in_num; unsigned in_num = req->elem.in_num;
unsigned out_num = req->elem->out_num; unsigned out_num = req->elem.out_num;
if (req->elem->out_num < 1 || req->elem->in_num < 1) { if (req->elem.out_num < 1 || req->elem.in_num < 1) {
error_report("virtio-blk missing headers"); error_report("virtio-blk missing headers");
exit(1); exit(1);
} }
@ -391,19 +404,19 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
* NB: per existing s/n string convention the string is * NB: per existing s/n string convention the string is
* terminated by '\0' only when shorter than buffer. * terminated by '\0' only when shorter than buffer.
*/ */
strncpy(req->elem->in_sg[0].iov_base, strncpy(req->elem.in_sg[0].iov_base,
s->blk.serial ? s->blk.serial : "", s->blk.serial ? s->blk.serial : "",
MIN(req->elem->in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES)); MIN(req->elem.in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES));
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
virtio_blk_free_request(req); virtio_blk_free_request(req);
} else if (type & VIRTIO_BLK_T_OUT) { } else if (type & VIRTIO_BLK_T_OUT) {
qemu_iovec_init_external(&req->qiov, &req->elem->out_sg[1], qemu_iovec_init_external(&req->qiov, &req->elem.out_sg[1],
req->elem->out_num - 1); req->elem.out_num - 1);
virtio_blk_handle_write(req, mrb); virtio_blk_handle_write(req, mrb);
} else if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_BARRIER) { } else if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_BARRIER) {
/* VIRTIO_BLK_T_IN is 0, so we can't just & it. */ /* VIRTIO_BLK_T_IN is 0, so we can't just & it. */
qemu_iovec_init_external(&req->qiov, &req->elem->in_sg[0], qemu_iovec_init_external(&req->qiov, &req->elem.in_sg[0],
req->elem->in_num - 1); req->elem.in_num - 1);
virtio_blk_handle_read(req); virtio_blk_handle_read(req);
} else { } else {
virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
@ -627,7 +640,7 @@ static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f)
while (req) { while (req) {
qemu_put_sbyte(f, 1); qemu_put_sbyte(f, 1);
qemu_put_buffer(f, (unsigned char *)req->elem, qemu_put_buffer(f, (unsigned char *)&req->elem,
sizeof(VirtQueueElement)); sizeof(VirtQueueElement));
req = req->next; req = req->next;
} }
@ -652,15 +665,15 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
while (qemu_get_sbyte(f)) { while (qemu_get_sbyte(f)) {
VirtIOBlockReq *req = virtio_blk_alloc_request(s); VirtIOBlockReq *req = virtio_blk_alloc_request(s);
qemu_get_buffer(f, (unsigned char *)req->elem, qemu_get_buffer(f, (unsigned char *)&req->elem,
sizeof(VirtQueueElement)); sizeof(VirtQueueElement));
req->next = s->rq; req->next = s->rq;
s->rq = req; s->rq = req;
virtqueue_map_sg(req->elem->in_sg, req->elem->in_addr, virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr,
req->elem->in_num, 1); req->elem.in_num, 1);
virtqueue_map_sg(req->elem->out_sg, req->elem->out_addr, virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr,
req->elem->out_num, 0); req->elem.out_num, 0);
} }
return 0; return 0;

View File

@ -499,6 +499,18 @@ static void ide_rw_error(IDEState *s) {
ide_set_irq(s->bus); ide_set_irq(s->bus);
} }
static bool ide_sect_range_ok(IDEState *s,
uint64_t sector, uint64_t nb_sectors)
{
uint64_t total_sectors;
bdrv_get_geometry(s->bs, &total_sectors);
if (sector > total_sectors || nb_sectors > total_sectors - sector) {
return false;
}
return true;
}
static void ide_sector_read_cb(void *opaque, int ret) static void ide_sector_read_cb(void *opaque, int ret)
{ {
IDEState *s = opaque; IDEState *s = opaque;
@ -554,6 +566,11 @@ void ide_sector_read(IDEState *s)
printf("sector=%" PRId64 "\n", sector_num); printf("sector=%" PRId64 "\n", sector_num);
#endif #endif
if (!ide_sect_range_ok(s, sector_num, n)) {
ide_rw_error(s);
return;
}
s->iov.iov_base = s->io_buffer; s->iov.iov_base = s->io_buffer;
s->iov.iov_len = n * BDRV_SECTOR_SIZE; s->iov.iov_len = n * BDRV_SECTOR_SIZE;
qemu_iovec_init_external(&s->qiov, &s->iov, 1); qemu_iovec_init_external(&s->qiov, &s->iov, 1);
@ -671,6 +688,12 @@ void ide_dma_cb(void *opaque, int ret)
sector_num, n, s->dma_cmd); sector_num, n, s->dma_cmd);
#endif #endif
if (!ide_sect_range_ok(s, sector_num, n)) {
dma_buf_commit(s);
ide_dma_error(s);
return;
}
switch (s->dma_cmd) { switch (s->dma_cmd) {
case IDE_DMA_READ: case IDE_DMA_READ:
s->bus->dma->aiocb = dma_bdrv_read(s->bs, &s->sg, sector_num, s->bus->dma->aiocb = dma_bdrv_read(s->bs, &s->sg, sector_num,
@ -790,6 +813,11 @@ void ide_sector_write(IDEState *s)
n = s->req_nb_sectors; n = s->req_nb_sectors;
} }
if (!ide_sect_range_ok(s, sector_num, n)) {
ide_rw_error(s);
return;
}
s->iov.iov_base = s->io_buffer; s->iov.iov_base = s->io_buffer;
s->iov.iov_len = n * BDRV_SECTOR_SIZE; s->iov.iov_len = n * BDRV_SECTOR_SIZE;
qemu_iovec_init_external(&s->qiov, &s->iov, 1); qemu_iovec_init_external(&s->qiov, &s->iov, 1);

View File

@ -272,7 +272,7 @@ static int get_indirect(Vring *vring, VirtQueueElement *elem,
return 0; return 0;
} }
void vring_free_element(VirtQueueElement *elem) static void vring_unmap_element(VirtQueueElement *elem)
{ {
int i; int i;
@ -287,8 +287,6 @@ void vring_free_element(VirtQueueElement *elem)
for (i = 0; i < elem->in_num; i++) { for (i = 0; i < elem->in_num; i++) {
vring_unmap(elem->in_sg[i].iov_base, true); vring_unmap(elem->in_sg[i].iov_base, true);
} }
g_slice_free(VirtQueueElement, elem);
} }
/* This looks in the virtqueue and for the first available buffer, and converts /* This looks in the virtqueue and for the first available buffer, and converts
@ -303,14 +301,16 @@ void vring_free_element(VirtQueueElement *elem)
* Stolen from linux/drivers/vhost/vhost.c. * Stolen from linux/drivers/vhost/vhost.c.
*/ */
int vring_pop(VirtIODevice *vdev, Vring *vring, int vring_pop(VirtIODevice *vdev, Vring *vring,
VirtQueueElement **p_elem) VirtQueueElement *elem)
{ {
struct vring_desc desc; struct vring_desc desc;
unsigned int i, head, found = 0, num = vring->vr.num; unsigned int i, head, found = 0, num = vring->vr.num;
uint16_t avail_idx, last_avail_idx; uint16_t avail_idx, last_avail_idx;
VirtQueueElement *elem = NULL;
int ret; int ret;
/* Initialize elem so it can be safely unmapped */
elem->in_num = elem->out_num = 0;
/* If there was a fatal error then refuse operation */ /* If there was a fatal error then refuse operation */
if (vring->broken) { if (vring->broken) {
ret = -EFAULT; ret = -EFAULT;
@ -342,10 +342,8 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
* the index we've seen. */ * the index we've seen. */
head = vring->vr.avail->ring[last_avail_idx % num]; head = vring->vr.avail->ring[last_avail_idx % num];
elem = g_slice_new(VirtQueueElement);
elem->index = head; elem->index = head;
elem->in_num = elem->out_num = 0;
/* If their number is silly, that's an error. */ /* If their number is silly, that's an error. */
if (unlikely(head >= num)) { if (unlikely(head >= num)) {
error_report("Guest says index %u > %u is available", head, num); error_report("Guest says index %u > %u is available", head, num);
@ -393,7 +391,6 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
/* On success, increment avail index. */ /* On success, increment avail index. */
vring->last_avail_idx++; vring->last_avail_idx++;
*p_elem = elem;
return head; return head;
out: out:
@ -401,10 +398,7 @@ out:
if (ret == -EFAULT) { if (ret == -EFAULT) {
vring->broken = true; vring->broken = true;
} }
if (elem) { vring_unmap_element(elem);
vring_free_element(elem);
}
*p_elem = NULL;
return ret; return ret;
} }
@ -418,7 +412,7 @@ void vring_push(Vring *vring, VirtQueueElement *elem, int len)
unsigned int head = elem->index; unsigned int head = elem->index;
uint16_t new; uint16_t new;
vring_free_element(elem); vring_unmap_element(elem);
/* Don't touch vring if a fatal error occurred */ /* Don't touch vring if a fatal error occurred */
if (vring->broken) { if (vring->broken) {

View File

@ -60,8 +60,14 @@ struct AioContext {
*/ */
int walking_handlers; int walking_handlers;
/* Used to avoid unnecessary event_notifier_set calls in aio_notify.
* Writes protected by lock or BQL, reads are lockless.
*/
bool dispatching;
/* lock to protect between bh's adders and deleter */ /* lock to protect between bh's adders and deleter */
QemuMutex bh_lock; QemuMutex bh_lock;
/* Anchor of the list of Bottom Halves belonging to the context */ /* Anchor of the list of Bottom Halves belonging to the context */
struct QEMUBH *first_bh; struct QEMUBH *first_bh;
@ -83,6 +89,9 @@ struct AioContext {
QEMUTimerListGroup tlg; QEMUTimerListGroup tlg;
}; };
/* Used internally to synchronize aio_poll against qemu_bh_schedule. */
void aio_set_dispatching(AioContext *ctx, bool dispatching);
/** /**
* aio_context_new: Allocate a new AioContext. * aio_context_new: Allocate a new AioContext.
* *
@ -205,9 +214,9 @@ bool aio_pending(AioContext *ctx);
/* Progress in completing AIO work to occur. This can issue new pending /* Progress in completing AIO work to occur. This can issue new pending
* aio as a result of executing I/O completion or bh callbacks. * aio as a result of executing I/O completion or bh callbacks.
* *
* If there is no pending AIO operation or completion (bottom half), * Return whether any progress was made by executing AIO or bottom half
* return false. If there are pending AIO operations of bottom halves, * handlers. If @blocking == true, this should always be true except
* return true. * if someone called aio_notify.
* *
* If there are no pending bottom halves, but there are pending AIO * If there are no pending bottom halves, but there are pending AIO
* operations, it may not be possible to make any progress without * operations, it may not be possible to make any progress without
@ -220,7 +229,7 @@ bool aio_poll(AioContext *ctx, bool blocking);
#ifdef CONFIG_POSIX #ifdef CONFIG_POSIX
/* Register a file descriptor and associated callbacks. Behaves very similarly /* Register a file descriptor and associated callbacks. Behaves very similarly
* to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will * to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will
* be invoked when using qemu_aio_wait(). * be invoked when using aio_poll().
* *
* Code that invokes AIO completion functions should rely on this function * Code that invokes AIO completion functions should rely on this function
* instead of qemu_set_fd_handler[2]. * instead of qemu_set_fd_handler[2].
@ -234,7 +243,7 @@ void aio_set_fd_handler(AioContext *ctx,
/* Register an event notifier and associated callbacks. Behaves very similarly /* Register an event notifier and associated callbacks. Behaves very similarly
* to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks
* will be invoked when using qemu_aio_wait(). * will be invoked when using aio_poll().
* *
* Code that invokes AIO completion functions should rely on this function * Code that invokes AIO completion functions should rely on this function
* instead of event_notifier_set_handler. * instead of event_notifier_set_handler.
@ -251,19 +260,6 @@ GSource *aio_get_g_source(AioContext *ctx);
/* Return the ThreadPool bound to this AioContext */ /* Return the ThreadPool bound to this AioContext */
struct ThreadPool *aio_get_thread_pool(AioContext *ctx); struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
/* Functions to operate on the main QEMU AioContext. */
bool qemu_aio_wait(void);
void qemu_aio_set_event_notifier(EventNotifier *notifier,
EventNotifierHandler *io_read);
#ifdef CONFIG_POSIX
void qemu_aio_set_fd_handler(int fd,
IOHandler *io_read,
IOHandler *io_write,
void *opaque);
#endif
/** /**
* aio_timer_new: * aio_timer_new:
* @ctx: the aio context * @ctx: the aio context

View File

@ -74,7 +74,7 @@ struct BlockJob {
* Set to true if the job should cancel itself. The flag must * Set to true if the job should cancel itself. The flag must
* always be tested just before toggling the busy flag from false * always be tested just before toggling the busy flag from false
* to true. After a job has been cancelled, it should only yield * to true. After a job has been cancelled, it should only yield
* if #qemu_aio_wait will ("sooner or later") reenter the coroutine. * if #aio_poll will ("sooner or later") reenter the coroutine.
*/ */
bool cancelled; bool cancelled;
@ -87,7 +87,7 @@ struct BlockJob {
/** /**
* Set to false by the job while it is in a quiescent state, where * Set to false by the job while it is in a quiescent state, where
* no I/O is pending and the job has yielded on any condition * no I/O is pending and the job has yielded on any condition
* that is not detected by #qemu_aio_wait, such as a timer. * that is not detected by #aio_poll, such as a timer.
*/ */
bool busy; bool busy;

View File

@ -212,7 +212,7 @@ void coroutine_fn co_sleep_ns(QEMUClockType type, int64_t ns);
* Yield the coroutine for a given duration * Yield the coroutine for a given duration
* *
* Behaves similarly to co_sleep_ns(), but the sleeping coroutine will be * Behaves similarly to co_sleep_ns(), but the sleeping coroutine will be
* resumed when using qemu_aio_wait(). * resumed when using aio_poll().
*/ */
void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type, void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
int64_t ns); int64_t ns);

View File

@ -53,8 +53,7 @@ void vring_teardown(Vring *vring, VirtIODevice *vdev, int n);
void vring_disable_notification(VirtIODevice *vdev, Vring *vring); void vring_disable_notification(VirtIODevice *vdev, Vring *vring);
bool vring_enable_notification(VirtIODevice *vdev, Vring *vring); bool vring_enable_notification(VirtIODevice *vdev, Vring *vring);
bool vring_should_notify(VirtIODevice *vdev, Vring *vring); bool vring_should_notify(VirtIODevice *vdev, Vring *vring);
int vring_pop(VirtIODevice *vdev, Vring *vring, VirtQueueElement **elem); int vring_pop(VirtIODevice *vdev, Vring *vring, VirtQueueElement *elem);
void vring_push(Vring *vring, VirtQueueElement *elem, int len); void vring_push(Vring *vring, VirtQueueElement *elem, int len);
void vring_free_element(VirtQueueElement *elem);
#endif /* VRING_H */ #endif /* VRING_H */

View File

@ -144,7 +144,7 @@ typedef struct MultiReqBuffer {
typedef struct VirtIOBlockReq { typedef struct VirtIOBlockReq {
VirtIOBlock *dev; VirtIOBlock *dev;
VirtQueueElement *elem; VirtQueueElement elem;
struct virtio_blk_inhdr *in; struct virtio_blk_inhdr *in;
struct virtio_blk_outhdr out; struct virtio_blk_outhdr out;
QEMUIOVector qiov; QEMUIOVector qiov;
@ -152,6 +152,10 @@ typedef struct VirtIOBlockReq {
BlockAcctCookie acct; BlockAcctCookie acct;
} VirtIOBlockReq; } VirtIOBlockReq;
VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s);
void virtio_blk_free_request(VirtIOBlockReq *req);
int virtio_blk_handle_scsi_req(VirtIOBlock *blk, int virtio_blk_handle_scsi_req(VirtIOBlock *blk,
VirtQueueElement *elem); VirtQueueElement *elem);

View File

@ -329,6 +329,7 @@ size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
int fillc, size_t bytes); int fillc, size_t bytes);
ssize_t qemu_iovec_compare(QEMUIOVector *a, QEMUIOVector *b); ssize_t qemu_iovec_compare(QEMUIOVector *a, QEMUIOVector *b);
void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf); void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf);
void qemu_iovec_discard_back(QEMUIOVector *qiov, size_t bytes);
bool buffer_is_zero(const void *buf, size_t len); bool buffer_is_zero(const void *buf, size_t len);

View File

@ -30,6 +30,7 @@ typedef ObjectClass IOThreadClass;
static void *iothread_run(void *opaque) static void *iothread_run(void *opaque)
{ {
IOThread *iothread = opaque; IOThread *iothread = opaque;
bool blocking;
qemu_mutex_lock(&iothread->init_done_lock); qemu_mutex_lock(&iothread->init_done_lock);
iothread->thread_id = qemu_get_thread_id(); iothread->thread_id = qemu_get_thread_id();
@ -38,8 +39,10 @@ static void *iothread_run(void *opaque)
while (!iothread->stopping) { while (!iothread->stopping) {
aio_context_acquire(iothread->ctx); aio_context_acquire(iothread->ctx);
while (!iothread->stopping && aio_poll(iothread->ctx, true)) { blocking = true;
while (!iothread->stopping && aio_poll(iothread->ctx, blocking)) {
/* Progress was made, keep going */ /* Progress was made, keep going */
blocking = false;
} }
aio_context_release(iothread->ctx); aio_context_release(iothread->ctx);
} }

View File

@ -498,24 +498,3 @@ QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
{ {
return aio_bh_new(qemu_aio_context, cb, opaque); return aio_bh_new(qemu_aio_context, cb, opaque);
} }
bool qemu_aio_wait(void)
{
return aio_poll(qemu_aio_context, true);
}
#ifdef CONFIG_POSIX
void qemu_aio_set_fd_handler(int fd,
IOHandler *io_read,
IOHandler *io_write,
void *opaque)
{
aio_set_fd_handler(qemu_aio_context, fd, io_read, io_write, opaque);
}
#endif
void qemu_aio_set_event_notifier(EventNotifier *notifier,
EventNotifierHandler *io_read)
{
aio_set_event_notifier(qemu_aio_context, notifier, io_read);
}

View File

@ -483,7 +483,7 @@ static int do_co_write_zeroes(BlockDriverState *bs, int64_t offset, int count,
co = qemu_coroutine_create(co_write_zeroes_entry); co = qemu_coroutine_create(co_write_zeroes_entry);
qemu_coroutine_enter(co, &data); qemu_coroutine_enter(co, &data);
while (!data.done) { while (!data.done) {
qemu_aio_wait(); aio_poll(bdrv_get_aio_context(bs), true);
} }
if (data.ret < 0) { if (data.ret < 0) {
return data.ret; return data.ret;
@ -2027,7 +2027,7 @@ static const cmdinfo_t resume_cmd = {
static int wait_break_f(BlockDriverState *bs, int argc, char **argv) static int wait_break_f(BlockDriverState *bs, int argc, char **argv)
{ {
while (!bdrv_debug_is_suspended(bs, argv[1])) { while (!bdrv_debug_is_suspended(bs, argv[1])) {
qemu_aio_wait(); aio_poll(bdrv_get_aio_context(bs), true);
} }
return 0; return 0;

View File

@ -33,7 +33,8 @@ status=1 # failure is the default!
_cleanup() _cleanup()
{ {
_cleanup_test_img rm -f "${TEST_IMG}.copy"
_cleanup_test_img
} }
trap "_cleanup; exit \$status" 0 1 2 3 15 trap "_cleanup; exit \$status" 0 1 2 3 15
@ -41,6 +42,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
. ./common.rc . ./common.rc
. ./common.filter . ./common.filter
. ./common.pattern . ./common.pattern
. ./common.qemu
# Any format supporting backing files except vmdk and qcow which do not support # Any format supporting backing files except vmdk and qcow which do not support
# smaller backing files. # smaller backing files.
@ -99,6 +101,29 @@ _check_test_img
# Rebase it on top of its base image # Rebase it on top of its base image
$QEMU_IMG rebase -b "$TEST_IMG.base" "$TEST_IMG" $QEMU_IMG rebase -b "$TEST_IMG.base" "$TEST_IMG"
echo
echo block-backup
echo
qemu_comm_method="monitor"
_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},id=disk
h=$QEMU_HANDLE
QEMU_COMM_TIMEOUT=1
_send_qemu_cmd $h "drive_backup disk ${TEST_IMG}.copy" "(qemu)"
qemu_cmd_repeat=20 _send_qemu_cmd $h "info block-jobs" "No active jobs"
_send_qemu_cmd $h 'quit' ""
# Base image sectors
TEST_IMG="${TEST_IMG}.copy" io readv $(( offset )) 512 1024 32
# Image sectors
TEST_IMG="${TEST_IMG}.copy" io readv $(( offset + 512 )) 512 1024 64
# Zero sectors beyond end of base image
TEST_IMG="${TEST_IMG}.copy" io_zero readv $(( offset + 32 * 1024 )) 512 1024 32
_check_test_img _check_test_img
# success, all done # success, all done

File diff suppressed because one or more lines are too long

View File

@ -24,14 +24,6 @@ typedef struct {
bool auto_set; bool auto_set;
} EventNotifierTestData; } EventNotifierTestData;
/* Wait until there are no more BHs or AIO requests */
static void wait_for_aio(void)
{
while (aio_poll(ctx, true)) {
/* Do nothing */
}
}
/* Wait until event notifier becomes inactive */ /* Wait until event notifier becomes inactive */
static void wait_until_inactive(EventNotifierTestData *data) static void wait_until_inactive(EventNotifierTestData *data)
{ {
@ -204,7 +196,9 @@ static void test_bh_schedule10(void)
g_assert(aio_poll(ctx, true)); g_assert(aio_poll(ctx, true));
g_assert_cmpint(data.n, ==, 2); g_assert_cmpint(data.n, ==, 2);
wait_for_aio(); while (data.n < 10) {
aio_poll(ctx, true);
}
g_assert_cmpint(data.n, ==, 10); g_assert_cmpint(data.n, ==, 10);
g_assert(!aio_poll(ctx, false)); g_assert(!aio_poll(ctx, false));
@ -252,7 +246,9 @@ static void test_bh_delete_from_cb(void)
qemu_bh_schedule(data1.bh); qemu_bh_schedule(data1.bh);
g_assert_cmpint(data1.n, ==, 0); g_assert_cmpint(data1.n, ==, 0);
wait_for_aio(); while (data1.n < data1.max) {
aio_poll(ctx, true);
}
g_assert_cmpint(data1.n, ==, data1.max); g_assert_cmpint(data1.n, ==, data1.max);
g_assert(data1.bh == NULL); g_assert(data1.bh == NULL);
@ -287,7 +283,12 @@ static void test_bh_delete_from_cb_many(void)
g_assert_cmpint(data4.n, ==, 1); g_assert_cmpint(data4.n, ==, 1);
g_assert(data1.bh == NULL); g_assert(data1.bh == NULL);
wait_for_aio(); while (data1.n < data1.max ||
data2.n < data2.max ||
data3.n < data3.max ||
data4.n < data4.max) {
aio_poll(ctx, true);
}
g_assert_cmpint(data1.n, ==, data1.max); g_assert_cmpint(data1.n, ==, data1.max);
g_assert_cmpint(data2.n, ==, data2.max); g_assert_cmpint(data2.n, ==, data2.max);
g_assert_cmpint(data3.n, ==, data3.max); g_assert_cmpint(data3.n, ==, data3.max);
@ -306,7 +307,7 @@ static void test_bh_flush(void)
qemu_bh_schedule(data.bh); qemu_bh_schedule(data.bh);
g_assert_cmpint(data.n, ==, 0); g_assert_cmpint(data.n, ==, 0);
wait_for_aio(); g_assert(aio_poll(ctx, true));
g_assert_cmpint(data.n, ==, 1); g_assert_cmpint(data.n, ==, 1);
g_assert(!aio_poll(ctx, false)); g_assert(!aio_poll(ctx, false));
@ -806,17 +807,16 @@ static void test_source_timer_schedule(void)
g_usleep(1 * G_USEC_PER_SEC); g_usleep(1 * G_USEC_PER_SEC);
g_assert_cmpint(data.n, ==, 0); g_assert_cmpint(data.n, ==, 0);
g_assert(g_main_context_iteration(NULL, false)); g_assert(g_main_context_iteration(NULL, true));
g_assert_cmpint(data.n, ==, 1); g_assert_cmpint(data.n, ==, 1);
expiry += data.ns;
/* The comment above was not kidding when it said this wakes up itself */ while (data.n < 2) {
do { g_main_context_iteration(NULL, true);
g_assert(g_main_context_iteration(NULL, true)); }
} while (qemu_clock_get_ns(data.clock_type) <= expiry);
g_usleep(1 * G_USEC_PER_SEC);
g_main_context_iteration(NULL, false);
g_assert_cmpint(data.n, ==, 2); g_assert_cmpint(data.n, ==, 2);
g_assert(qemu_clock_get_ns(data.clock_type) > expiry);
aio_set_fd_handler(ctx, pipefd[0], NULL, NULL, NULL); aio_set_fd_handler(ctx, pipefd[0], NULL, NULL, NULL);
close(pipefd[0]); close(pipefd[0]);

View File

@ -83,7 +83,7 @@ static void co_test_cb(void *opaque)
data->ret = 0; data->ret = 0;
active--; active--;
/* The test continues in test_submit_co, after qemu_aio_wait_all... */ /* The test continues in test_submit_co, after aio_poll... */
} }
static void test_submit_co(void) static void test_submit_co(void)
@ -98,7 +98,7 @@ static void test_submit_co(void)
g_assert_cmpint(active, ==, 1); g_assert_cmpint(active, ==, 1);
g_assert_cmpint(data.ret, ==, -EINPROGRESS); g_assert_cmpint(data.ret, ==, -EINPROGRESS);
/* qemu_aio_wait_all will execute the rest of the coroutine. */ /* aio_poll will execute the rest of the coroutine. */
while (data.ret == -EINPROGRESS) { while (data.ret == -EINPROGRESS) {
aio_poll(ctx, true); aio_poll(ctx, true);

View File

@ -550,3 +550,16 @@ size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt,
return total; return total;
} }
void qemu_iovec_discard_back(QEMUIOVector *qiov, size_t bytes)
{
size_t total;
unsigned int niov = qiov->niov;
assert(qiov->size >= bytes);
total = iov_discard_back(qiov->iov, &niov, bytes);
assert(total == bytes);
qiov->niov = niov;
qiov->size -= bytes;
}