From a1ca3ed5ece383de86994b04e2fef01dad7e7e95 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 18 Feb 2019 17:09:10 +0300 Subject: [PATCH 01/27] block: enhance QEMUIOVector structure Add a possibility of embedded iovec, for cases when we need only one local iov. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Message-id: 20190218140926.333779-2-vsementsov@virtuozzo.com Message-Id: <20190218140926.333779-2-vsementsov@virtuozzo.com> Signed-off-by: Stefan Hajnoczi --- include/qemu/iov.h | 64 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 2 deletions(-) diff --git a/include/qemu/iov.h b/include/qemu/iov.h index 5f433c7768..48b45987b7 100644 --- a/include/qemu/iov.h +++ b/include/qemu/iov.h @@ -133,10 +133,70 @@ size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt, typedef struct QEMUIOVector { struct iovec *iov; int niov; - int nalloc; - size_t size; + + /* + * For external @iov (qemu_iovec_init_external()) or allocated @iov + * (qemu_iovec_init()), @size is the cumulative size of iovecs and + * @local_iov is invalid and unused. + * + * For embedded @iov (QEMU_IOVEC_INIT_BUF() or qemu_iovec_init_buf()), + * @iov is equal to &@local_iov, and @size is valid, as it has same + * offset and type as @local_iov.iov_len, which is guaranteed by + * static assertion below. + * + * @nalloc is always valid and is -1 both for embedded and external + * cases. It is included in the union only to ensure the padding prior + * to the @size field will not result in a 0-length array. + */ + union { + struct { + int nalloc; + struct iovec local_iov; + }; + struct { + char __pad[sizeof(int) + offsetof(struct iovec, iov_len)]; + size_t size; + }; + }; } QEMUIOVector; +QEMU_BUILD_BUG_ON(offsetof(QEMUIOVector, size) != + offsetof(QEMUIOVector, local_iov.iov_len)); + +#define QEMU_IOVEC_INIT_BUF(self, buf, len) \ +{ \ + .iov = &(self).local_iov, \ + .niov = 1, \ + .nalloc = -1, \ + .local_iov = { \ + .iov_base = (void *)(buf), /* cast away const */ \ + .iov_len = (len), \ + }, \ +} + +/* + * qemu_iovec_init_buf + * + * Initialize embedded QEMUIOVector. + * + * Note: "const" is used over @buf pointer to make it simple to pass + * const pointers, appearing in read functions. Then this "const" is + * cast away by QEMU_IOVEC_INIT_BUF(). + */ +static inline void qemu_iovec_init_buf(QEMUIOVector *qiov, + const void *buf, size_t len) +{ + *qiov = (QEMUIOVector) QEMU_IOVEC_INIT_BUF(*qiov, buf, len); +} + +static inline void *qemu_iovec_buf(QEMUIOVector *qiov) +{ + /* Only supports embedded iov */ + assert(qiov->nalloc == -1 && qiov->iov == &qiov->local_iov); + + return qiov->local_iov.iov_base; +} + void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); From 0d93ed08454e76fa943f6f71964362296d41b12e Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 18 Feb 2019 17:09:11 +0300 Subject: [PATCH 02/27] block/io: use qemu_iovec_init_buf Use new qemu_iovec_init_buf() instead of qemu_iovec_init_external( ... , 1), which simplifies the code. While being here, use qemu_try_blockalign0 as well. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-id: 20190218140926.333779-3-vsementsov@virtuozzo.com Message-Id: <20190218140926.333779-3-vsementsov@virtuozzo.com> Signed-off-by: Stefan Hajnoczi --- block/io.c | 89 ++++++++++++------------------------------------------ 1 file changed, 20 insertions(+), 69 deletions(-) diff --git a/block/io.c b/block/io.c index 213ca03d8d..2ba603c7bc 100644 --- a/block/io.c +++ b/block/io.c @@ -843,17 +843,13 @@ static int bdrv_prwv_co(BdrvChild *child, int64_t offset, static int bdrv_rw_co(BdrvChild *child, int64_t sector_num, uint8_t *buf, int nb_sectors, bool is_write, BdrvRequestFlags flags) { - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = (void *)buf, - .iov_len = nb_sectors * BDRV_SECTOR_SIZE, - }; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, + nb_sectors * BDRV_SECTOR_SIZE); if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { return -EINVAL; } - qemu_iovec_init_external(&qiov, &iov, 1); return bdrv_prwv_co(child, sector_num << BDRV_SECTOR_BITS, &qiov, is_write, flags); } @@ -880,13 +876,8 @@ int bdrv_write(BdrvChild *child, int64_t sector_num, int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int bytes, BdrvRequestFlags flags) { - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = NULL, - .iov_len = bytes, - }; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes); - qemu_iovec_init_external(&qiov, &iov, 1); return bdrv_prwv_co(child, offset, &qiov, true, BDRV_REQ_ZERO_WRITE | flags); } @@ -950,17 +941,12 @@ int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov) int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes) { - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = (void *)buf, - .iov_len = bytes, - }; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); if (bytes < 0) { return -EINVAL; } - qemu_iovec_init_external(&qiov, &iov, 1); return bdrv_preadv(child, offset, &qiov); } @@ -978,17 +964,12 @@ int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov) int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes) { - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = (void *) buf, - .iov_len = bytes, - }; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); if (bytes < 0) { return -EINVAL; } - qemu_iovec_init_external(&qiov, &iov, 1); return bdrv_pwritev(child, offset, &qiov); } @@ -1165,7 +1146,6 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, void *bounce_buffer; BlockDriver *drv = bs->drv; - struct iovec iov; QEMUIOVector local_qiov; int64_t cluster_offset; int64_t cluster_bytes; @@ -1230,9 +1210,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, if (ret <= 0) { /* Must copy-on-read; use the bounce buffer */ - iov.iov_base = bounce_buffer; - iov.iov_len = pnum = MIN(pnum, MAX_BOUNCE_BUFFER); - qemu_iovec_init_external(&local_qiov, &iov, 1); + pnum = MIN(pnum, MAX_BOUNCE_BUFFER); + qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum); ret = bdrv_driver_preadv(bs, cluster_offset, pnum, &local_qiov, 0); @@ -1477,7 +1456,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, { BlockDriver *drv = bs->drv; QEMUIOVector qiov; - struct iovec iov = {0}; + void *buf = NULL; int ret = 0; bool need_flush = false; int head = 0; @@ -1547,16 +1526,14 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, need_flush = true; } num = MIN(num, max_transfer); - iov.iov_len = num; - if (iov.iov_base == NULL) { - iov.iov_base = qemu_try_blockalign(bs, num); - if (iov.iov_base == NULL) { + if (buf == NULL) { + buf = qemu_try_blockalign0(bs, num); + if (buf == NULL) { ret = -ENOMEM; goto fail; } - memset(iov.iov_base, 0, num); } - qemu_iovec_init_external(&qiov, &iov, 1); + qemu_iovec_init_buf(&qiov, buf, num); ret = bdrv_driver_pwritev(bs, offset, num, &qiov, write_flags); @@ -1564,8 +1541,8 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, * all future requests. */ if (num < max_transfer) { - qemu_vfree(iov.iov_base); - iov.iov_base = NULL; + qemu_vfree(buf); + buf = NULL; } } @@ -1577,7 +1554,7 @@ fail: if (ret == 0 && need_flush) { ret = bdrv_co_flush(bs); } - qemu_vfree(iov.iov_base); + qemu_vfree(buf); return ret; } @@ -1763,7 +1740,6 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, BlockDriverState *bs = child->bs; uint8_t *buf = NULL; QEMUIOVector local_qiov; - struct iovec iov; uint64_t align = bs->bl.request_alignment; unsigned int head_padding_bytes, tail_padding_bytes; int ret = 0; @@ -1775,11 +1751,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, assert(flags & BDRV_REQ_ZERO_WRITE); if (head_padding_bytes || tail_padding_bytes) { buf = qemu_blockalign(bs, align); - iov = (struct iovec) { - .iov_base = buf, - .iov_len = align, - }; - qemu_iovec_init_external(&local_qiov, &iov, 1); + qemu_iovec_init_buf(&local_qiov, buf, align); } if (head_padding_bytes) { uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes); @@ -1885,17 +1857,12 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, if (offset & (align - 1)) { QEMUIOVector head_qiov; - struct iovec head_iov; mark_request_serialising(&req, align); wait_serialising_requests(&req); head_buf = qemu_blockalign(bs, align); - head_iov = (struct iovec) { - .iov_base = head_buf, - .iov_len = align, - }; - qemu_iovec_init_external(&head_qiov, &head_iov, 1); + qemu_iovec_init_buf(&head_qiov, head_buf, align); bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align, @@ -1924,7 +1891,6 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, if ((offset + bytes) & (align - 1)) { QEMUIOVector tail_qiov; - struct iovec tail_iov; size_t tail_bytes; bool waited; @@ -1933,11 +1899,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, assert(!waited || !use_local_qiov); tail_buf = qemu_blockalign(bs, align); - tail_iov = (struct iovec) { - .iov_base = tail_buf, - .iov_len = align, - }; - qemu_iovec_init_external(&tail_qiov, &tail_iov, 1); + qemu_iovec_init_buf(&tail_qiov, tail_buf, align); bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1), @@ -2468,15 +2430,9 @@ bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos, int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, int64_t pos, int size) { - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = (void *) buf, - .iov_len = size, - }; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size); int ret; - qemu_iovec_init_external(&qiov, &iov, 1); - ret = bdrv_writev_vmstate(bs, &qiov, pos); if (ret < 0) { return ret; @@ -2493,14 +2449,9 @@ int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, int64_t pos, int size) { - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = buf, - .iov_len = size, - }; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size); int ret; - qemu_iovec_init_external(&qiov, &iov, 1); ret = bdrv_readv_vmstate(bs, &qiov, pos); if (ret < 0) { return ret; From ae5a9592ce0d5c11e40667f217b2e2ceba33927f Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 18 Feb 2019 17:09:12 +0300 Subject: [PATCH 03/27] block/block-backend: use QEMU_IOVEC_INIT_BUF Use new QEMU_IOVEC_INIT_BUF() instead of qemu_iovec_init_external( ... , 1), which simplifies the code. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-id: 20190218140926.333779-4-vsementsov@virtuozzo.com Message-Id: <20190218140926.333779-4-vsementsov@virtuozzo.com> Signed-off-by: Stefan Hajnoczi --- block/block-backend.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index f6ea824308..6cc25569ef 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1204,17 +1204,8 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, int64_t bytes, CoroutineEntry co_entry, BdrvRequestFlags flags) { - QEMUIOVector qiov; - struct iovec iov; - BlkRwCo rwco; - - iov = (struct iovec) { - .iov_base = buf, - .iov_len = bytes, - }; - qemu_iovec_init_external(&qiov, &iov, 1); - - rwco = (BlkRwCo) { + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + BlkRwCo rwco = { .blk = blk, .offset = offset, .iobuf = &qiov, From 31e0a6e1af78fcca60fa30c345fb7b54d49da0c9 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 18 Feb 2019 17:09:13 +0300 Subject: [PATCH 04/27] block/backup: use qemu_iovec_init_buf Use new qemu_iovec_init_buf() instead of qemu_iovec_init_external( ... , 1), which simplifies the code. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-id: 20190218140926.333779-5-vsementsov@virtuozzo.com Message-Id: <20190218140926.333779-5-vsementsov@virtuozzo.com> Signed-off-by: Stefan Hajnoczi --- block/backup.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/block/backup.c b/block/backup.c index 435414e964..9988753249 100644 --- a/block/backup.c +++ b/block/backup.c @@ -107,7 +107,6 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job, void **bounce_buffer) { int ret; - struct iovec iov; QEMUIOVector qiov; BlockBackend *blk = job->common.blk; int nbytes; @@ -119,9 +118,7 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job, if (!*bounce_buffer) { *bounce_buffer = blk_blockalign(blk, job->cluster_size); } - iov.iov_base = *bounce_buffer; - iov.iov_len = nbytes; - qemu_iovec_init_external(&qiov, &iov, 1); + qemu_iovec_init_buf(&qiov, *bounce_buffer, nbytes); ret = blk_co_preadv(blk, start, qiov.size, &qiov, read_flags); if (ret < 0) { From ee7a883ace6f19d3b3d2c2ef7e4ca86232ab523a Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 18 Feb 2019 17:09:14 +0300 Subject: [PATCH 05/27] block/commit: use QEMU_IOVEC_INIT_BUF Use new QEMU_IOVEC_INIT_BUF() instead of qemu_iovec_init_external( ... , 1), which simplifies the code. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-id: 20190218140926.333779-6-vsementsov@virtuozzo.com Message-Id: <20190218140926.333779-6-vsementsov@virtuozzo.com> Signed-off-by: Stefan Hajnoczi --- block/commit.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/block/commit.c b/block/commit.c index 53148e610b..d500a93068 100644 --- a/block/commit.c +++ b/block/commit.c @@ -47,14 +47,9 @@ static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base, void *buf) { int ret = 0; - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = buf, - .iov_len = bytes, - }; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); assert(bytes < SIZE_MAX); - qemu_iovec_init_external(&qiov, &iov, 1); ret = blk_co_preadv(bs, offset, qiov.size, &qiov, 0); if (ret < 0) { From 8040446d3080ebd446c619364f60b7643732a6ab Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 18 Feb 2019 17:09:15 +0300 Subject: [PATCH 06/27] block/stream: use QEMU_IOVEC_INIT_BUF Use new QEMU_IOVEC_INIT_BUF() instead of qemu_iovec_init_external( ... , 1), which simplifies the code. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-id: 20190218140926.333779-7-vsementsov@virtuozzo.com Message-Id: <20190218140926.333779-7-vsementsov@virtuozzo.com> Signed-off-by: Stefan Hajnoczi --- block/stream.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/block/stream.c b/block/stream.c index 7a49ac0992..e14579ff80 100644 --- a/block/stream.c +++ b/block/stream.c @@ -41,14 +41,9 @@ static int coroutine_fn stream_populate(BlockBackend *blk, int64_t offset, uint64_t bytes, void *buf) { - struct iovec iov = { - .iov_base = buf, - .iov_len = bytes, - }; - QEMUIOVector qiov; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); assert(bytes < SIZE_MAX); - qemu_iovec_init_external(&qiov, &iov, 1); /* Copy-on-read the unallocated clusters */ return blk_co_preadv(blk, offset, qiov.size, &qiov, BDRV_REQ_COPY_ON_READ); From b2589d66b40ed0595d5b4c16827324d0f98c5106 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 18 Feb 2019 17:09:16 +0300 Subject: [PATCH 07/27] block/parallels: use QEMU_IOVEC_INIT_BUF Use new QEMU_IOVEC_INIT_BUF() instead of qemu_iovec_init_external( ... , 1), which simplifies the code. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-id: 20190218140926.333779-8-vsementsov@virtuozzo.com Message-Id: <20190218140926.333779-8-vsementsov@virtuozzo.com> Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index cc9445879d..15bc97b759 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -220,23 +220,20 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, if (bs->backing) { int64_t nb_cow_sectors = to_allocate * s->tracks; int64_t nb_cow_bytes = nb_cow_sectors << BDRV_SECTOR_BITS; - QEMUIOVector qiov; - struct iovec iov = { - .iov_len = nb_cow_bytes, - .iov_base = qemu_blockalign(bs, nb_cow_bytes) - }; - qemu_iovec_init_external(&qiov, &iov, 1); + QEMUIOVector qiov = + QEMU_IOVEC_INIT_BUF(qiov, qemu_blockalign(bs, nb_cow_bytes), + nb_cow_bytes); ret = bdrv_co_preadv(bs->backing, idx * s->tracks * BDRV_SECTOR_SIZE, nb_cow_bytes, &qiov, 0); if (ret < 0) { - qemu_vfree(iov.iov_base); + qemu_vfree(qemu_iovec_buf(&qiov)); return ret; } ret = bdrv_co_pwritev(bs->file, s->data_end * BDRV_SECTOR_SIZE, nb_cow_bytes, &qiov, 0); - qemu_vfree(iov.iov_base); + qemu_vfree(qemu_iovec_buf(&qiov)); if (ret < 0) { return ret; } From 30d780f8fe98f4a417f505d5b7354ba5635bd193 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 18 Feb 2019 17:09:17 +0300 Subject: [PATCH 08/27] block/qcow: use qemu_iovec_init_buf Use new qemu_iovec_init_buf() instead of qemu_iovec_init_external( ... , 1), which simplifies the code. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-id: 20190218140926.333779-9-vsementsov@virtuozzo.com Message-Id: <20190218140926.333779-9-vsementsov@virtuozzo.com> Signed-off-by: Stefan Hajnoczi --- block/qcow.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/block/qcow.c b/block/qcow.c index 0a235bf393..409c700d33 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -628,7 +628,6 @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset, int offset_in_cluster; int ret = 0, n; uint64_t cluster_offset; - struct iovec hd_iov; QEMUIOVector hd_qiov; uint8_t *buf; void *orig_buf; @@ -661,9 +660,7 @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset, if (!cluster_offset) { if (bs->backing) { /* read from the base image */ - hd_iov.iov_base = (void *)buf; - hd_iov.iov_len = n; - qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); + qemu_iovec_init_buf(&hd_qiov, buf, n); qemu_co_mutex_unlock(&s->lock); /* qcow2 emits this on bs->file instead of bs->backing */ BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); @@ -688,9 +685,7 @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset, ret = -EIO; break; } - hd_iov.iov_base = (void *)buf; - hd_iov.iov_len = n; - qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); + qemu_iovec_init_buf(&hd_qiov, buf, n); qemu_co_mutex_unlock(&s->lock); BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); ret = bdrv_co_preadv(bs->file, cluster_offset + offset_in_cluster, @@ -733,7 +728,6 @@ static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, uint64_t offset, int offset_in_cluster; uint64_t cluster_offset; int ret = 0, n; - struct iovec hd_iov; QEMUIOVector hd_qiov; uint8_t *buf; void *orig_buf; @@ -779,9 +773,7 @@ static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, uint64_t offset, } } - hd_iov.iov_base = (void *)buf; - hd_iov.iov_len = n; - qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); + qemu_iovec_init_buf(&hd_qiov, buf, n); qemu_co_mutex_unlock(&s->lock); BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster, @@ -1062,7 +1054,6 @@ qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, { BDRVQcowState *s = bs->opaque; QEMUIOVector hd_qiov; - struct iovec iov; z_stream strm; int ret, out_len; uint8_t *buf, *out_buf; @@ -1128,11 +1119,7 @@ qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, } cluster_offset &= s->cluster_offset_mask; - iov = (struct iovec) { - .iov_base = out_buf, - .iov_len = out_len, - }; - qemu_iovec_init_external(&hd_qiov, &iov, 1); + qemu_iovec_init_buf(&hd_qiov, out_buf, out_len); BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED); ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0); if (ret < 0) { From c793d4ff20b3470a0110638b46e83de0908e1652 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 18 Feb 2019 17:09:18 +0300 Subject: [PATCH 09/27] block/qcow2: use qemu_iovec_init_buf Use new qemu_iovec_init_buf() instead of qemu_iovec_init_external( ... , 1), which simplifies the code. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-id: 20190218140926.333779-10-vsementsov@virtuozzo.com Message-Id: <20190218140926.333779-10-vsementsov@virtuozzo.com> Signed-off-by: Stefan Hajnoczi --- block/qcow2.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 65a54c9ac6..b6d475229e 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3894,7 +3894,6 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, { BDRVQcow2State *s = bs->opaque; QEMUIOVector hd_qiov; - struct iovec iov; int ret; size_t out_len; uint8_t *buf, *out_buf; @@ -3960,11 +3959,7 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, goto fail; } - iov = (struct iovec) { - .iov_base = out_buf, - .iov_len = out_len, - }; - qemu_iovec_init_external(&hd_qiov, &iov, 1); + qemu_iovec_init_buf(&hd_qiov, out_buf, out_len); BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED); ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0); @@ -3990,7 +3985,6 @@ qcow2_co_preadv_compressed(BlockDriverState *bs, int ret = 0, csize, nb_csectors; uint64_t coffset; uint8_t *buf, *out_buf; - struct iovec iov; QEMUIOVector local_qiov; int offset_in_cluster = offset_into_cluster(s, offset); @@ -4002,9 +3996,7 @@ qcow2_co_preadv_compressed(BlockDriverState *bs, if (!buf) { return -ENOMEM; } - iov.iov_base = buf; - iov.iov_len = csize; - qemu_iovec_init_external(&local_qiov, &iov, 1); + qemu_iovec_init_buf(&local_qiov, buf, csize); out_buf = qemu_blockalign(bs, s->cluster_size); From 342544f98b8e27adef17bb2ca7964c6efb3698d6 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 18 Feb 2019 17:09:19 +0300 Subject: [PATCH 10/27] block/qed: use qemu_iovec_init_buf Use new qemu_iovec_init_buf() instead of qemu_iovec_init_external( ... , 1), which simplifies the code. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-id: 20190218140926.333779-11-vsementsov@virtuozzo.com Message-Id: <20190218140926.333779-11-vsementsov@virtuozzo.com> Signed-off-by: Stefan Hajnoczi --- block/qed-table.c | 16 +++------------- block/qed.c | 31 +++++++++---------------------- 2 files changed, 12 insertions(+), 35 deletions(-) diff --git a/block/qed-table.c b/block/qed-table.c index 7df5680adb..c497bd4aec 100644 --- a/block/qed-table.c +++ b/block/qed-table.c @@ -21,16 +21,11 @@ /* Called with table_lock held. */ static int qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table) { - QEMUIOVector qiov; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF( + qiov, table->offsets, s->header.cluster_size * s->header.table_size); int noffsets; int i, ret; - struct iovec iov = { - .iov_base = table->offsets, - .iov_len = s->header.cluster_size * s->header.table_size, - }; - qemu_iovec_init_external(&qiov, &iov, 1); - trace_qed_read_table(s, offset, table); qemu_co_mutex_unlock(&s->table_lock); @@ -71,7 +66,6 @@ static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1; unsigned int start, end, i; QEDTable *new_table; - struct iovec iov; QEMUIOVector qiov; size_t len_bytes; int ret; @@ -85,11 +79,7 @@ static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, len_bytes = (end - start) * sizeof(uint64_t); new_table = qemu_blockalign(s->bs, len_bytes); - iov = (struct iovec) { - .iov_base = new_table->offsets, - .iov_len = len_bytes, - }; - qemu_iovec_init_external(&qiov, &iov, 1); + qemu_iovec_init_buf(&qiov, new_table->offsets, len_bytes); /* Byteswap table */ for (i = start; i < end; i++) { diff --git a/block/qed.c b/block/qed.c index 1280870024..c5e6d6ad41 100644 --- a/block/qed.c +++ b/block/qed.c @@ -113,18 +113,13 @@ static int coroutine_fn qed_write_header(BDRVQEDState *s) int nsectors = DIV_ROUND_UP(sizeof(QEDHeader), BDRV_SECTOR_SIZE); size_t len = nsectors * BDRV_SECTOR_SIZE; uint8_t *buf; - struct iovec iov; QEMUIOVector qiov; int ret; assert(s->allocating_acb || s->allocating_write_reqs_plugged); buf = qemu_blockalign(s->bs, len); - iov = (struct iovec) { - .iov_base = buf, - .iov_len = len, - }; - qemu_iovec_init_external(&qiov, &iov, 1); + qemu_iovec_init_buf(&qiov, buf, len); ret = bdrv_co_preadv(s->bs->file, 0, qiov.size, &qiov, 0); if (ret < 0) { @@ -913,7 +908,6 @@ static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s, { QEMUIOVector qiov; QEMUIOVector *backing_qiov = NULL; - struct iovec iov; int ret; /* Skip copy entirely if there is no work to do */ @@ -921,11 +915,7 @@ static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s, return 0; } - iov = (struct iovec) { - .iov_base = qemu_blockalign(s->bs, len), - .iov_len = len, - }; - qemu_iovec_init_external(&qiov, &iov, 1); + qemu_iovec_init_buf(&qiov, qemu_blockalign(s->bs, len), len); ret = qed_read_backing_file(s, pos, &qiov, &backing_qiov); @@ -946,7 +936,7 @@ static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s, } ret = 0; out: - qemu_vfree(iov.iov_base); + qemu_vfree(qemu_iovec_buf(&qiov)); return ret; } @@ -1447,8 +1437,12 @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs, BdrvRequestFlags flags) { BDRVQEDState *s = bs->opaque; - QEMUIOVector qiov; - struct iovec iov; + + /* + * Zero writes start without an I/O buffer. If a buffer becomes necessary + * then it will be allocated during request processing. + */ + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes); /* Fall back if the request is not aligned */ if (qed_offset_into_cluster(s, offset) || @@ -1456,13 +1450,6 @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs, return -ENOTSUP; } - /* Zero writes start without an I/O buffer. If a buffer becomes necessary - * then it will be allocated during request processing. - */ - iov.iov_base = NULL; - iov.iov_len = bytes; - - qemu_iovec_init_external(&qiov, &iov, 1); return qed_co_request(bs, offset >> BDRV_SECTOR_BITS, &qiov, bytes >> BDRV_SECTOR_BITS, QED_AIOCB_WRITE | QED_AIOCB_ZERO); From 199d95b043f4d160deb6162527b5141bd6b7bdc9 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 18 Feb 2019 17:09:20 +0300 Subject: [PATCH 11/27] block/vmdk: use qemu_iovec_init_buf Use new qemu_iovec_init_buf() instead of qemu_iovec_init_external( ... , 1), which simplifies the code. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-id: 20190218140926.333779-12-vsementsov@virtuozzo.com Message-Id: <20190218140926.333779-12-vsementsov@virtuozzo.com> Signed-off-by: Stefan Hajnoczi --- block/vmdk.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/block/vmdk.c b/block/vmdk.c index 096e8eb662..41048741cd 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1371,7 +1371,6 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset, VmdkGrainMarker *data = NULL; uLongf buf_len; QEMUIOVector local_qiov; - struct iovec iov; int64_t write_offset; int64_t write_end_sector; @@ -1399,11 +1398,7 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset, data->size = cpu_to_le32(buf_len); n_bytes = buf_len + sizeof(VmdkGrainMarker); - iov = (struct iovec) { - .iov_base = data, - .iov_len = n_bytes, - }; - qemu_iovec_init_external(&local_qiov, &iov, 1); + qemu_iovec_init_buf(&local_qiov, data, n_bytes); BLKDBG_EVENT(extent->file, BLKDBG_WRITE_COMPRESSED); } else { From c075a0af22442e88226a6cef22da3fafccfb5d7e Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 18 Feb 2019 17:09:21 +0300 Subject: [PATCH 12/27] qemu-img: use qemu_iovec_init_buf Use new qemu_iovec_init_buf() instead of qemu_iovec_init_external( ... , 1), which simplifies the code. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-id: 20190218140926.333779-13-vsementsov@virtuozzo.com Message-Id: <20190218140926.333779-13-vsementsov@virtuozzo.com> Signed-off-by: Stefan Hajnoczi --- qemu-img.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index 25288c4d18..7853935049 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1670,7 +1670,6 @@ static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num, { int n, ret; QEMUIOVector qiov; - struct iovec iov; assert(nb_sectors <= s->buf_sectors); while (nb_sectors > 0) { @@ -1686,9 +1685,7 @@ static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num, bs_sectors = s->src_sectors[src_cur]; n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset)); - iov.iov_base = buf; - iov.iov_len = n << BDRV_SECTOR_BITS; - qemu_iovec_init_external(&qiov, &iov, 1); + qemu_iovec_init_buf(&qiov, buf, n << BDRV_SECTOR_BITS); ret = blk_co_preadv( blk, (sector_num - src_cur_offset) << BDRV_SECTOR_BITS, @@ -1712,7 +1709,6 @@ static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num, { int ret; QEMUIOVector qiov; - struct iovec iov; while (nb_sectors > 0) { int n = nb_sectors; @@ -1740,9 +1736,7 @@ static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num, (s->compressed && !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE))) { - iov.iov_base = buf; - iov.iov_len = n << BDRV_SECTOR_BITS; - qemu_iovec_init_external(&qiov, &iov, 1); + qemu_iovec_init_buf(&qiov, buf, n << BDRV_SECTOR_BITS); ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS, n << BDRV_SECTOR_BITS, &qiov, flags); From f556f37b110edbee7b59ba6e2de7a6b84e90cab3 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 18 Feb 2019 17:09:22 +0300 Subject: [PATCH 13/27] migration/block: use qemu_iovec_init_buf Use new qemu_iovec_init_buf() instead of qemu_iovec_init_external( ... , 1), which simplifies the code. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-id: 20190218140926.333779-14-vsementsov@virtuozzo.com Message-Id: <20190218140926.333779-14-vsementsov@virtuozzo.com> Signed-off-by: Stefan Hajnoczi --- migration/block.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/migration/block.c b/migration/block.c index 0e24e18d13..83c633fb3f 100644 --- a/migration/block.c +++ b/migration/block.c @@ -83,7 +83,6 @@ typedef struct BlkMigBlock { BlkMigDevState *bmds; int64_t sector; int nr_sectors; - struct iovec iov; QEMUIOVector qiov; BlockAIOCB *aiocb; @@ -314,9 +313,7 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) blk->sector = cur_sector; blk->nr_sectors = nr_sectors; - blk->iov.iov_base = blk->buf; - blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE; - qemu_iovec_init_external(&blk->qiov, &blk->iov, 1); + qemu_iovec_init_buf(&blk->qiov, blk->buf, nr_sectors * BDRV_SECTOR_SIZE); blk_mig_lock(); block_mig_state.submitted++; @@ -556,9 +553,8 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, blk->nr_sectors = nr_sectors; if (is_async) { - blk->iov.iov_base = blk->buf; - blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE; - qemu_iovec_init_external(&blk->qiov, &blk->iov, 1); + qemu_iovec_init_buf(&blk->qiov, blk->buf, + nr_sectors * BDRV_SECTOR_SIZE); blk->aiocb = blk_aio_preadv(bmds->blk, sector * BDRV_SECTOR_SIZE, From 405d8fe0ba62ddb5d027ad7bc77c6ee88460ad23 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 18 Feb 2019 17:09:23 +0300 Subject: [PATCH 14/27] tests/test-bdrv-drain: use QEMU_IOVEC_INIT_BUF Use new QEMU_IOVEC_INIT_BUF() instead of qemu_iovec_init_external( ... , 1), which simplifies the code. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-id: 20190218140926.333779-15-vsementsov@virtuozzo.com Message-Id: <20190218140926.333779-15-vsementsov@virtuozzo.com> Signed-off-by: Stefan Hajnoczi --- tests/test-bdrv-drain.c | 29 ++++------------------------- 1 file changed, 4 insertions(+), 25 deletions(-) diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c index ee1740ff06..821be405f0 100644 --- a/tests/test-bdrv-drain.c +++ b/tests/test-bdrv-drain.c @@ -204,12 +204,7 @@ static void test_drv_cb_common(enum drain_type drain_type, bool recursive) BlockAIOCB *acb; int aio_ret; - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = NULL, - .iov_len = 0, - }; - qemu_iovec_init_external(&qiov, &iov, 1); + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0); blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, @@ -670,12 +665,7 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) AioContext *ctx_a = iothread_get_aio_context(a); AioContext *ctx_b = iothread_get_aio_context(b); - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = NULL, - .iov_len = 0, - }; - qemu_iovec_init_external(&qiov, &iov, 1); + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0); /* bdrv_drain_all() may only be called from the main loop thread */ if (drain_type == BDRV_DRAIN_ALL && drain_thread != 0) { @@ -1148,13 +1138,7 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque) BlockDriverState *bs = blk_bs(blk); BDRVTestTopState *tts = bs->opaque; void *buffer = g_malloc(65536); - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = buffer, - .iov_len = 65536, - }; - - qemu_iovec_init_external(&qiov, &iov, 1); + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buffer, 65536); /* Pretend some internal write operation from parent to child. * Important: We have to read from the child, not from the parent! @@ -1365,12 +1349,7 @@ static void test_detach_indirect(bool by_parent_cb) BdrvChild *child_a, *child_b; BlockAIOCB *acb; - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = NULL, - .iov_len = 0, - }; - qemu_iovec_init_external(&qiov, &iov, 1); + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0); if (!by_parent_cb) { detach_by_driver_cb_role = child_file; From e5863d49e41b1b4b695f854c711a55d2584ee367 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 18 Feb 2019 17:09:24 +0300 Subject: [PATCH 15/27] hw/ide: drop iov field from IDEState @iov is used only to initialize @qiov. Let's use new qemu_iovec_init_buf() instead, which simplifies the code. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-id: 20190218140926.333779-16-vsementsov@virtuozzo.com Message-Id: <20190218140926.333779-16-vsementsov@virtuozzo.com> Signed-off-by: Stefan Hajnoczi --- hw/ide/atapi.c | 9 ++++----- hw/ide/core.c | 8 ++------ include/hw/ide/internal.h | 1 - 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c index 39e473f9c2..4de86555d9 100644 --- a/hw/ide/atapi.c +++ b/hw/ide/atapi.c @@ -174,16 +174,15 @@ static void cd_read_sector_cb(void *opaque, int ret) static int cd_read_sector(IDEState *s) { + void *buf; + if (s->cd_sector_size != 2048 && s->cd_sector_size != 2352) { block_acct_invalid(blk_get_stats(s->blk), BLOCK_ACCT_READ); return -EINVAL; } - s->iov.iov_base = (s->cd_sector_size == 2352) ? - s->io_buffer + 16 : s->io_buffer; - - s->iov.iov_len = ATAPI_SECTOR_SIZE; - qemu_iovec_init_external(&s->qiov, &s->iov, 1); + buf = (s->cd_sector_size == 2352) ? s->io_buffer + 16 : s->io_buffer; + qemu_iovec_init_buf(&s->qiov, buf, ATAPI_SECTOR_SIZE); trace_cd_read_sector(s->lba); diff --git a/hw/ide/core.c b/hw/ide/core.c index 84832008b8..13f8f78ca6 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -774,9 +774,7 @@ static void ide_sector_read(IDEState *s) return; } - s->iov.iov_base = s->io_buffer; - s->iov.iov_len = n * BDRV_SECTOR_SIZE; - qemu_iovec_init_external(&s->qiov, &s->iov, 1); + qemu_iovec_init_buf(&s->qiov, s->io_buffer, n * BDRV_SECTOR_SIZE); block_acct_start(blk_get_stats(s->blk), &s->acct, n * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); @@ -1045,9 +1043,7 @@ static void ide_sector_write(IDEState *s) return; } - s->iov.iov_base = s->io_buffer; - s->iov.iov_len = n * BDRV_SECTOR_SIZE; - qemu_iovec_init_external(&s->qiov, &s->iov, 1); + qemu_iovec_init_buf(&s->qiov, s->io_buffer, n * BDRV_SECTOR_SIZE); block_acct_start(blk_get_stats(s->blk), &s->acct, n * BDRV_SECTOR_SIZE, BLOCK_ACCT_WRITE); diff --git a/include/hw/ide/internal.h b/include/hw/ide/internal.h index 880413ddc7..fa99486d7a 100644 --- a/include/hw/ide/internal.h +++ b/include/hw/ide/internal.h @@ -405,7 +405,6 @@ struct IDEState { int atapi_dma; /* true if dma is requested for the packet cmd */ BlockAcctCookie acct; BlockAIOCB *pio_aiocb; - struct iovec iov; QEMUIOVector qiov; QLIST_HEAD(, IDEBufferedRequest) buffered_requests; /* ATA DMA state */ From 5bbe9325a07520dbeadcc96b02a3b8f471f73e4a Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 18 Feb 2019 17:09:25 +0300 Subject: [PATCH 16/27] hw/ide: drop iov field from IDEBufferedRequest @iov is used only to initialize @qiov. Let's use new qemu_iovec_init_buf() instead, which simplifies the code. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-id: 20190218140926.333779-17-vsementsov@virtuozzo.com Message-Id: <20190218140926.333779-17-vsementsov@virtuozzo.com> Signed-off-by: Stefan Hajnoczi --- hw/ide/core.c | 11 ++++++----- include/hw/ide/internal.h | 1 - 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/ide/core.c b/hw/ide/core.c index 13f8f78ca6..6afadf894f 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -629,13 +629,15 @@ static void ide_buffered_readv_cb(void *opaque, int ret) IDEBufferedRequest *req = opaque; if (!req->orphaned) { if (!ret) { - qemu_iovec_from_buf(req->original_qiov, 0, req->iov.iov_base, + assert(req->qiov.size == req->original_qiov->size); + qemu_iovec_from_buf(req->original_qiov, 0, + req->qiov.local_iov.iov_base, req->original_qiov->size); } req->original_cb(req->original_opaque, ret); } QLIST_REMOVE(req, list); - qemu_vfree(req->iov.iov_base); + qemu_vfree(qemu_iovec_buf(&req->qiov)); g_free(req); } @@ -660,9 +662,8 @@ BlockAIOCB *ide_buffered_readv(IDEState *s, int64_t sector_num, req->original_qiov = iov; req->original_cb = cb; req->original_opaque = opaque; - req->iov.iov_base = qemu_blockalign(blk_bs(s->blk), iov->size); - req->iov.iov_len = iov->size; - qemu_iovec_init_external(&req->qiov, &req->iov, 1); + qemu_iovec_init_buf(&req->qiov, blk_blockalign(s->blk, iov->size), + iov->size); aioreq = blk_aio_preadv(s->blk, sector_num << BDRV_SECTOR_BITS, &req->qiov, 0, ide_buffered_readv_cb, req); diff --git a/include/hw/ide/internal.h b/include/hw/ide/internal.h index fa99486d7a..1b02bb9151 100644 --- a/include/hw/ide/internal.h +++ b/include/hw/ide/internal.h @@ -346,7 +346,6 @@ extern const char *IDE_DMA_CMD_lookup[IDE_DMA__COUNT]; typedef struct IDEBufferedRequest { QLIST_ENTRY(IDEBufferedRequest) list; - struct iovec iov; QEMUIOVector qiov; QEMUIOVector *original_qiov; BlockCompletionFunc *original_cb; From 9942586b3f1879244d51de4efb44e18b93514b9a Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 18 Feb 2019 17:09:26 +0300 Subject: [PATCH 17/27] hw/ide: drop iov field from IDEDMA @iov is used only to initialize @qiov. Let's use new qemu_iovec_init_buf() instead, which simplifies the code. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-id: 20190218140926.333779-18-vsementsov@virtuozzo.com Message-Id: <20190218140926.333779-18-vsementsov@virtuozzo.com> Signed-off-by: Stefan Hajnoczi --- hw/ide/atapi.c | 5 ++--- include/hw/ide/internal.h | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c index 4de86555d9..1b0f66cc08 100644 --- a/hw/ide/atapi.c +++ b/hw/ide/atapi.c @@ -420,9 +420,8 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret) data_offset = 0; } trace_ide_atapi_cmd_read_dma_cb_aio(s, s->lba, n); - s->bus->dma->iov.iov_base = (void *)(s->io_buffer + data_offset); - s->bus->dma->iov.iov_len = n * ATAPI_SECTOR_SIZE; - qemu_iovec_init_external(&s->bus->dma->qiov, &s->bus->dma->iov, 1); + qemu_iovec_init_buf(&s->bus->dma->qiov, s->io_buffer + data_offset, + n * ATAPI_SECTOR_SIZE); s->bus->dma->aiocb = ide_buffered_readv(s, (int64_t)s->lba << 2, &s->bus->dma->qiov, n * 4, diff --git a/include/hw/ide/internal.h b/include/hw/ide/internal.h index 1b02bb9151..8efd03132b 100644 --- a/include/hw/ide/internal.h +++ b/include/hw/ide/internal.h @@ -455,7 +455,6 @@ struct IDEDMAOps { struct IDEDMA { const struct IDEDMAOps *ops; - struct iovec iov; QEMUIOVector qiov; BlockAIOCB *aiocb; }; From 00f639fb8f3e2b34f0e9767619cb16604a844a5f Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 21 Feb 2019 11:33:05 +0100 Subject: [PATCH 18/27] virtio-blk: add acct_failed param to virtio_blk_handle_rw_error() We add acct_failed param in order to use virtio_blk_handle_rw_error() also when is not required to call block_acct_failed(). (eg. a discard operation is failed) Reviewed-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Signed-off-by: Stefano Garzarella Message-id: 20190221103314.58500-2-sgarzare@redhat.com Message-Id: <20190221103314.58500-2-sgarzare@redhat.com> Signed-off-by: Stefan Hajnoczi --- hw/block/virtio-blk.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index cf7f47eaba..afedf48ca6 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -65,7 +65,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) } static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, - bool is_read) + bool is_read, bool acct_failed) { VirtIOBlock *s = req->dev; BlockErrorAction action = blk_get_error_action(s->blk, is_read, error); @@ -78,7 +78,9 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, s->rq = req; } else if (action == BLOCK_ERROR_ACTION_REPORT) { virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); - block_acct_failed(blk_get_stats(s->blk), &req->acct); + if (acct_failed) { + block_acct_failed(blk_get_stats(s->blk), &req->acct); + } virtio_blk_free_request(req); } @@ -116,7 +118,7 @@ static void virtio_blk_rw_complete(void *opaque, int ret) * the memory until the request is completed (which will * happen on the other side of the migration). */ - if (virtio_blk_handle_rw_error(req, -ret, is_read)) { + if (virtio_blk_handle_rw_error(req, -ret, is_read, true)) { continue; } } @@ -135,7 +137,7 @@ static void virtio_blk_flush_complete(void *opaque, int ret) aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); if (ret) { - if (virtio_blk_handle_rw_error(req, -ret, 0)) { + if (virtio_blk_handle_rw_error(req, -ret, 0, true)) { goto out; } } From bbe8bd4d85d80442f87774d7bffaca11f2c02b9b Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 21 Feb 2019 11:33:06 +0100 Subject: [PATCH 19/27] virtio-blk: add host_features field in VirtIOBlock Since configurable features for virtio-blk are growing, this patch adds host_features field in the struct VirtIOBlock. (as in virtio-net) In this way, we can avoid to add new fields for new properties and we can directly set VIRTIO_BLK_F* flags in the host_features. We update "config-wce" and "scsi" property definition to use the new host_features field without change the behaviour. Suggested-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Stefano Garzarella Message-id: 20190221103314.58500-3-sgarzare@redhat.com Message-Id: <20190221103314.58500-3-sgarzare@redhat.com> Signed-off-by: Stefan Hajnoczi --- hw/block/virtio-blk.c | 16 +++++++++------- include/hw/virtio/virtio-blk.h | 3 +-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index afedf48ca6..7813237756 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -245,7 +245,7 @@ static int virtio_blk_handle_scsi_req(VirtIOBlockReq *req) */ scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base; - if (!blk->conf.scsi) { + if (!virtio_has_feature(blk->host_features, VIRTIO_BLK_F_SCSI)) { status = VIRTIO_BLK_S_UNSUPP; goto fail; } @@ -785,12 +785,15 @@ static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features, { VirtIOBlock *s = VIRTIO_BLK(vdev); + /* Firstly sync all virtio-blk possible supported features */ + features |= s->host_features; + virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX); virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY); virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY); virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE); if (virtio_has_feature(features, VIRTIO_F_VERSION_1)) { - if (s->conf.scsi) { + if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_SCSI)) { error_setg(errp, "Please set scsi=off for virtio-blk devices in order to use virtio 1.0"); return 0; } @@ -799,9 +802,6 @@ static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features, virtio_add_feature(&features, VIRTIO_BLK_F_SCSI); } - if (s->conf.config_wce) { - virtio_add_feature(&features, VIRTIO_BLK_F_CONFIG_WCE); - } if (blk_enable_write_cache(s->blk)) { virtio_add_feature(&features, VIRTIO_BLK_F_WCE); } @@ -1015,9 +1015,11 @@ static Property virtio_blk_properties[] = { DEFINE_BLOCK_ERROR_PROPERTIES(VirtIOBlock, conf.conf), DEFINE_BLOCK_CHS_PROPERTIES(VirtIOBlock, conf.conf), DEFINE_PROP_STRING("serial", VirtIOBlock, conf.serial), - DEFINE_PROP_BIT("config-wce", VirtIOBlock, conf.config_wce, 0, true), + DEFINE_PROP_BIT64("config-wce", VirtIOBlock, host_features, + VIRTIO_BLK_F_CONFIG_WCE, true), #ifdef __linux__ - DEFINE_PROP_BIT("scsi", VirtIOBlock, conf.scsi, 0, false), + DEFINE_PROP_BIT64("scsi", VirtIOBlock, host_features, + VIRTIO_BLK_F_SCSI, false), #endif DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0, true), diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index 5117431d96..f7345b0511 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -35,8 +35,6 @@ struct VirtIOBlkConf BlockConf conf; IOThread *iothread; char *serial; - uint32_t scsi; - uint32_t config_wce; uint32_t request_merging; uint16_t num_queues; uint16_t queue_size; @@ -57,6 +55,7 @@ typedef struct VirtIOBlock { bool dataplane_disabled; bool dataplane_started; struct VirtIOBlockDataPlane *dataplane; + uint64_t host_features; } VirtIOBlock; typedef struct VirtIOBlockReq { From 5c81161f804144b146607f890e84613a4cbad95c Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 21 Feb 2019 11:33:07 +0100 Subject: [PATCH 20/27] virtio-blk: add "discard" and "write-zeroes" properties In order to avoid migration issues, we enable DISCARD and WRITE_ZEROES features only for machine type >= 4.0 As discussed with Michael S. Tsirkin and Stefan Hajnoczi on the list [1], DISCARD operation should not have security implications (eg. page cache attacks), so we can enable it by default. [1] https://lists.gnu.org/archive/html/qemu-devel/2019-02/msg00504.html Suggested-by: Dr. David Alan Gilbert Reviewed-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Signed-off-by: Stefano Garzarella Message-id: 20190221103314.58500-4-sgarzare@redhat.com Message-Id: <20190221103314.58500-4-sgarzare@redhat.com> Signed-off-by: Stefan Hajnoczi --- hw/block/virtio-blk.c | 4 ++++ hw/core/machine.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 7813237756..f7cd322811 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1027,6 +1027,10 @@ static Property virtio_blk_properties[] = { DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 128), DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD, IOThread *), + DEFINE_PROP_BIT64("discard", VirtIOBlock, host_features, + VIRTIO_BLK_F_DISCARD, true), + DEFINE_PROP_BIT64("write-zeroes", VirtIOBlock, host_features, + VIRTIO_BLK_F_WRITE_ZEROES, true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/core/machine.c b/hw/core/machine.c index 077fbd182a..766ca5899d 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -33,6 +33,8 @@ GlobalProperty hw_compat_3_1[] = { { "usb-kbd", "serial", "42" }, { "usb-mouse", "serial", "42" }, { "usb-kbd", "serial", "42" }, + { "virtio-blk-device", "discard", "false" }, + { "virtio-blk-device", "write-zeroes", "false" }, }; const size_t hw_compat_3_1_len = G_N_ELEMENTS(hw_compat_3_1); From ba550851f5de642b8a1359e2c5c27cbb5a52ae61 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 21 Feb 2019 11:33:08 +0100 Subject: [PATCH 21/27] virtio-net: make VirtIOFeature usable for other virtio devices In order to use VirtIOFeature also in other virtio devices, we move its declaration and the endof() macro (renamed in virtio_endof()) in virtio.h. We add virtio_feature_get_config_size() function to iterate the array of VirtIOFeature and to return the config size depending on the features enabled. (as virtio_net_set_config_size() did) Suggested-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Signed-off-by: Stefano Garzarella Message-id: 20190221103314.58500-5-sgarzare@redhat.com Message-Id: <20190221103314.58500-5-sgarzare@redhat.com> Signed-off-by: Stefan Hajnoczi --- hw/net/virtio-net.c | 31 +++++++------------------------ hw/virtio/virtio.c | 15 +++++++++++++++ include/hw/virtio/virtio.h | 15 +++++++++++++++ 3 files changed, 37 insertions(+), 24 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 3f319ef723..6e6b146022 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -82,29 +82,17 @@ static inline __virtio16 *virtio_net_rsc_ext_num_dupacks( #endif -/* - * Calculate the number of bytes up to and including the given 'field' of - * 'container'. - */ -#define endof(container, field) \ - (offsetof(container, field) + sizeof_field(container, field)) - -typedef struct VirtIOFeature { - uint64_t flags; - size_t end; -} VirtIOFeature; - static VirtIOFeature feature_sizes[] = { {.flags = 1ULL << VIRTIO_NET_F_MAC, - .end = endof(struct virtio_net_config, mac)}, + .end = virtio_endof(struct virtio_net_config, mac)}, {.flags = 1ULL << VIRTIO_NET_F_STATUS, - .end = endof(struct virtio_net_config, status)}, + .end = virtio_endof(struct virtio_net_config, status)}, {.flags = 1ULL << VIRTIO_NET_F_MQ, - .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, + .end = virtio_endof(struct virtio_net_config, max_virtqueue_pairs)}, {.flags = 1ULL << VIRTIO_NET_F_MTU, - .end = endof(struct virtio_net_config, mtu)}, + .end = virtio_endof(struct virtio_net_config, mtu)}, {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, - .end = endof(struct virtio_net_config, duplex)}, + .end = virtio_endof(struct virtio_net_config, duplex)}, {} }; @@ -2580,15 +2568,10 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) { - int i, config_size = 0; virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); - for (i = 0; feature_sizes[i].flags != 0; i++) { - if (host_features & feature_sizes[i].flags) { - config_size = MAX(feature_sizes[i].end, config_size); - } - } - n->config_size = config_size; + n->config_size = virtio_feature_get_config_size(feature_sizes, + host_features); } void virtio_net_set_netclient_name(VirtIONet *n, const char *name, diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index a1ff647a66..2626a895cb 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2036,6 +2036,21 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val) return ret; } +size_t virtio_feature_get_config_size(VirtIOFeature *feature_sizes, + uint64_t host_features) +{ + size_t config_size = 0; + int i; + + for (i = 0; feature_sizes[i].flags != 0; i++) { + if (host_features & feature_sizes[i].flags) { + config_size = MAX(feature_sizes[i].end, config_size); + } + } + + return config_size; +} + int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) { int i, ret; diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 9c1fa07d6d..ce9516236a 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -37,6 +37,21 @@ static inline hwaddr vring_align(hwaddr addr, return QEMU_ALIGN_UP(addr, align); } +/* + * Calculate the number of bytes up to and including the given 'field' of + * 'container'. + */ +#define virtio_endof(container, field) \ + (offsetof(container, field) + sizeof_field(container, field)) + +typedef struct VirtIOFeature { + uint64_t flags; + size_t end; +} VirtIOFeature; + +size_t virtio_feature_get_config_size(VirtIOFeature *features, + uint64_t host_features); + typedef struct VirtQueue VirtQueue; #define VIRTQUEUE_MAX_SIZE 1024 From 20764be0421c3d5d97f84219d55a412f3cd2ca9a Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 21 Feb 2019 11:33:09 +0100 Subject: [PATCH 22/27] virtio-blk: set config size depending on the features enabled Starting from DISABLE and WRITE_ZEROES features, we use an array of VirtIOFeature (as virtio-net) to properly set the config size depending on the features enabled. Reviewed-by: Stefan Hajnoczi Signed-off-by: Stefano Garzarella Message-id: 20190221103314.58500-6-sgarzare@redhat.com Message-Id: <20190221103314.58500-6-sgarzare@redhat.com> Signed-off-by: Stefan Hajnoczi --- hw/block/virtio-blk.c | 31 +++++++++++++++++++++++++------ include/hw/virtio/virtio-blk.h | 1 + 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index f7cd322811..8798d13bc4 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -28,9 +28,28 @@ #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" -/* We don't support discard yet, hide associated config fields. */ +/* Config size before the discard support (hide associated config fields) */ #define VIRTIO_BLK_CFG_SIZE offsetof(struct virtio_blk_config, \ max_discard_sectors) +/* + * Starting from the discard feature, we can use this array to properly + * set the config size depending on the features enabled. + */ +static VirtIOFeature feature_sizes[] = { + {.flags = 1ULL << VIRTIO_BLK_F_DISCARD, + .end = virtio_endof(struct virtio_blk_config, discard_sector_alignment)}, + {.flags = 1ULL << VIRTIO_BLK_F_WRITE_ZEROES, + .end = virtio_endof(struct virtio_blk_config, write_zeroes_may_unmap)}, + {} +}; + +static void virtio_blk_set_config_size(VirtIOBlock *s, uint64_t host_features) +{ + s->config_size = MAX(VIRTIO_BLK_CFG_SIZE, + virtio_feature_get_config_size(feature_sizes, host_features)); + + assert(s->config_size <= sizeof(struct virtio_blk_config)); +} static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq, VirtIOBlockReq *req) @@ -763,8 +782,7 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) blkcfg.alignment_offset = 0; blkcfg.wce = blk_enable_write_cache(s->blk); virtio_stw_p(vdev, &blkcfg.num_queues, s->conf.num_queues); - memcpy(config, &blkcfg, VIRTIO_BLK_CFG_SIZE); - QEMU_BUILD_BUG_ON(VIRTIO_BLK_CFG_SIZE > sizeof(blkcfg)); + memcpy(config, &blkcfg, s->config_size); } static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) @@ -772,8 +790,7 @@ static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) VirtIOBlock *s = VIRTIO_BLK(vdev); struct virtio_blk_config blkcfg; - memcpy(&blkcfg, config, VIRTIO_BLK_CFG_SIZE); - QEMU_BUILD_BUG_ON(VIRTIO_BLK_CFG_SIZE > sizeof(blkcfg)); + memcpy(&blkcfg, config, s->config_size); aio_context_acquire(blk_get_aio_context(s->blk)); blk_set_enable_write_cache(s->blk, blkcfg.wce != 0); @@ -956,7 +973,9 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) return; } - virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, VIRTIO_BLK_CFG_SIZE); + virtio_blk_set_config_size(s, s->host_features); + + virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, s->config_size); s->blk = conf->conf.blk; s->rq = NULL; diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index f7345b0511..7877ae67ae 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -56,6 +56,7 @@ typedef struct VirtIOBlock { bool dataplane_started; struct VirtIOBlockDataPlane *dataplane; uint64_t host_features; + size_t config_size; } VirtIOBlock; typedef struct VirtIOBlockReq { From 37b06f8d46fe602e630e4bdce24e80a3e0f70cc2 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 21 Feb 2019 11:33:10 +0100 Subject: [PATCH 23/27] virtio-blk: add DISCARD and WRITE_ZEROES features This patch adds the support of DISCARD and WRITE_ZEROES commands, that have been introduced in the virtio-blk protocol to have better performance when using SSD backend. We support only one segment per request since multiple segments are not widely used and there are no userspace APIs that allow applications to submit multiple segments in a single call. Reviewed-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Signed-off-by: Stefano Garzarella Message-id: 20190221103314.58500-7-sgarzare@redhat.com Message-Id: <20190221103314.58500-7-sgarzare@redhat.com> Signed-off-by: Stefan Hajnoczi --- hw/block/virtio-blk.c | 184 +++++++++++++++++++++++++++++++++ include/hw/virtio/virtio-blk.h | 2 + 2 files changed, 186 insertions(+) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 8798d13bc4..c159a3d5f7 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -169,6 +169,30 @@ out: aio_context_release(blk_get_aio_context(s->conf.conf.blk)); } +static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) +{ + VirtIOBlockReq *req = opaque; + VirtIOBlock *s = req->dev; + bool is_write_zeroes = (virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type) & + ~VIRTIO_BLK_T_BARRIER) == VIRTIO_BLK_T_WRITE_ZEROES; + + aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); + if (ret) { + if (virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) { + goto out; + } + } + + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + if (is_write_zeroes) { + block_acct_done(blk_get_stats(s->blk), &req->acct); + } + virtio_blk_free_request(req); + +out: + aio_context_release(blk_get_aio_context(s->conf.conf.blk)); +} + #ifdef __linux__ typedef struct { @@ -502,6 +526,84 @@ static bool virtio_blk_sect_range_ok(VirtIOBlock *dev, return true; } +static uint8_t virtio_blk_handle_discard_write_zeroes(VirtIOBlockReq *req, + struct virtio_blk_discard_write_zeroes *dwz_hdr, bool is_write_zeroes) +{ + VirtIOBlock *s = req->dev; + VirtIODevice *vdev = VIRTIO_DEVICE(s); + uint64_t sector; + uint32_t num_sectors, flags, max_sectors; + uint8_t err_status; + int bytes; + + sector = virtio_ldq_p(vdev, &dwz_hdr->sector); + num_sectors = virtio_ldl_p(vdev, &dwz_hdr->num_sectors); + flags = virtio_ldl_p(vdev, &dwz_hdr->flags); + max_sectors = is_write_zeroes ? s->conf.max_write_zeroes_sectors : + s->conf.max_discard_sectors; + + /* + * max_sectors is at most BDRV_REQUEST_MAX_SECTORS, this check + * make us sure that "num_sectors << BDRV_SECTOR_BITS" can fit in + * the integer variable. + */ + if (unlikely(num_sectors > max_sectors)) { + err_status = VIRTIO_BLK_S_IOERR; + goto err; + } + + bytes = num_sectors << BDRV_SECTOR_BITS; + + if (unlikely(!virtio_blk_sect_range_ok(s, sector, bytes))) { + err_status = VIRTIO_BLK_S_IOERR; + goto err; + } + + /* + * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard + * and write zeroes commands if any unknown flag is set. + */ + if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) { + err_status = VIRTIO_BLK_S_UNSUPP; + goto err; + } + + if (is_write_zeroes) { /* VIRTIO_BLK_T_WRITE_ZEROES */ + int blk_aio_flags = 0; + + if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { + blk_aio_flags |= BDRV_REQ_MAY_UNMAP; + } + + block_acct_start(blk_get_stats(s->blk), &req->acct, bytes, + BLOCK_ACCT_WRITE); + + blk_aio_pwrite_zeroes(s->blk, sector << BDRV_SECTOR_BITS, + bytes, blk_aio_flags, + virtio_blk_discard_write_zeroes_complete, req); + } else { /* VIRTIO_BLK_T_DISCARD */ + /* + * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for + * discard commands if the unmap flag is set. + */ + if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) { + err_status = VIRTIO_BLK_S_UNSUPP; + goto err; + } + + blk_aio_pdiscard(s->blk, sector << BDRV_SECTOR_BITS, bytes, + virtio_blk_discard_write_zeroes_complete, req); + } + + return VIRTIO_BLK_S_OK; + +err: + if (is_write_zeroes) { + block_acct_invalid(blk_get_stats(s->blk), BLOCK_ACCT_WRITE); + } + return err_status; +} + static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) { uint32_t type; @@ -603,6 +705,47 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) virtio_blk_free_request(req); break; } + /* + * VIRTIO_BLK_T_DISCARD and VIRTIO_BLK_T_WRITE_ZEROES are defined with + * VIRTIO_BLK_T_OUT flag set. We masked this flag in the switch statement, + * so we must mask it for these requests, then we will check if it is set. + */ + case VIRTIO_BLK_T_DISCARD & ~VIRTIO_BLK_T_OUT: + case VIRTIO_BLK_T_WRITE_ZEROES & ~VIRTIO_BLK_T_OUT: + { + struct virtio_blk_discard_write_zeroes dwz_hdr; + size_t out_len = iov_size(out_iov, out_num); + bool is_write_zeroes = (type & ~VIRTIO_BLK_T_BARRIER) == + VIRTIO_BLK_T_WRITE_ZEROES; + uint8_t err_status; + + /* + * Unsupported if VIRTIO_BLK_T_OUT is not set or the request contains + * more than one segment. + */ + if (unlikely(!(type & VIRTIO_BLK_T_OUT) || + out_len > sizeof(dwz_hdr))) { + virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); + virtio_blk_free_request(req); + return 0; + } + + if (unlikely(iov_to_buf(out_iov, out_num, 0, &dwz_hdr, + sizeof(dwz_hdr)) != sizeof(dwz_hdr))) { + virtio_error(vdev, "virtio-blk discard/write_zeroes header" + " too short"); + return -1; + } + + err_status = virtio_blk_handle_discard_write_zeroes(req, &dwz_hdr, + is_write_zeroes); + if (err_status != VIRTIO_BLK_S_OK) { + virtio_blk_req_complete(req, err_status); + virtio_blk_free_request(req); + } + + break; + } default: virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); virtio_blk_free_request(req); @@ -782,6 +925,24 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) blkcfg.alignment_offset = 0; blkcfg.wce = blk_enable_write_cache(s->blk); virtio_stw_p(vdev, &blkcfg.num_queues, s->conf.num_queues); + if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD)) { + virtio_stl_p(vdev, &blkcfg.max_discard_sectors, + s->conf.max_discard_sectors); + virtio_stl_p(vdev, &blkcfg.discard_sector_alignment, + blk_size >> BDRV_SECTOR_BITS); + /* + * We support only one segment per request since multiple segments + * are not widely used and there are no userspace APIs that allow + * applications to submit multiple segments in a single call. + */ + virtio_stl_p(vdev, &blkcfg.max_discard_seg, 1); + } + if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES)) { + virtio_stl_p(vdev, &blkcfg.max_write_zeroes_sectors, + s->conf.max_write_zeroes_sectors); + blkcfg.write_zeroes_may_unmap = 1; + virtio_stl_p(vdev, &blkcfg.max_write_zeroes_seg, 1); + } memcpy(config, &blkcfg, s->config_size); } @@ -973,6 +1134,25 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) return; } + if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD) && + (!conf->max_discard_sectors || + conf->max_discard_sectors > BDRV_REQUEST_MAX_SECTORS)) { + error_setg(errp, "invalid max-discard-sectors property (%" PRIu32 ")" + ", must be between 1 and %d", + conf->max_discard_sectors, (int)BDRV_REQUEST_MAX_SECTORS); + return; + } + + if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES) && + (!conf->max_write_zeroes_sectors || + conf->max_write_zeroes_sectors > BDRV_REQUEST_MAX_SECTORS)) { + error_setg(errp, "invalid max-write-zeroes-sectors property (%" PRIu32 + "), must be between 1 and %d", + conf->max_write_zeroes_sectors, + (int)BDRV_REQUEST_MAX_SECTORS); + return; + } + virtio_blk_set_config_size(s, s->host_features); virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, s->config_size); @@ -1050,6 +1230,10 @@ static Property virtio_blk_properties[] = { VIRTIO_BLK_F_DISCARD, true), DEFINE_PROP_BIT64("write-zeroes", VirtIOBlock, host_features, VIRTIO_BLK_F_WRITE_ZEROES, true), + DEFINE_PROP_UINT32("max-discard-sectors", VirtIOBlock, + conf.max_discard_sectors, BDRV_REQUEST_MAX_SECTORS), + DEFINE_PROP_UINT32("max-write-zeroes-sectors", VirtIOBlock, + conf.max_write_zeroes_sectors, BDRV_REQUEST_MAX_SECTORS), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index 7877ae67ae..cddcfbebe9 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -38,6 +38,8 @@ struct VirtIOBlkConf uint32_t request_merging; uint16_t num_queues; uint16_t queue_size; + uint32_t max_discard_sectors; + uint32_t max_write_zeroes_sectors; }; struct VirtIOBlockDataPlane; From f6cd8a6366d6bad4a557616a5f70c6d2e374b8c1 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 21 Feb 2019 11:33:11 +0100 Subject: [PATCH 24/27] tests/virtio-blk: change assert on data_size in virtio_blk_request() The size of data in the virtio_blk_request must be a multiple of 512 bytes for IN and OUT requests, or a multiple of the size of struct virtio_blk_discard_write_zeroes for DISCARD and WRITE_ZEROES requests. Reviewed-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Reviewed-by: Thomas Huth Signed-off-by: Stefano Garzarella Message-id: 20190221103314.58500-8-sgarzare@redhat.com Message-Id: <20190221103314.58500-8-sgarzare@redhat.com> Signed-off-by: Stefan Hajnoczi --- tests/virtio-blk-test.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c index 04c608764b..0739498da7 100644 --- a/tests/virtio-blk-test.c +++ b/tests/virtio-blk-test.c @@ -144,7 +144,20 @@ static uint64_t virtio_blk_request(QGuestAllocator *alloc, QVirtioDevice *d, uint64_t addr; uint8_t status = 0xFF; - g_assert_cmpuint(data_size % 512, ==, 0); + switch (req->type) { + case VIRTIO_BLK_T_IN: + case VIRTIO_BLK_T_OUT: + g_assert_cmpuint(data_size % 512, ==, 0); + break; + case VIRTIO_BLK_T_DISCARD: + case VIRTIO_BLK_T_WRITE_ZEROES: + g_assert_cmpuint(data_size % + sizeof(struct virtio_blk_discard_write_zeroes), ==, 0); + break; + default: + g_assert_cmpuint(data_size, ==, 0); + } + addr = guest_alloc(alloc, sizeof(*req) + data_size); virtio_blk_fix_request(d, req); From ca1a98042bad3fe749c5d3d882f3bd5e956b7690 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 21 Feb 2019 11:33:12 +0100 Subject: [PATCH 25/27] tests/virtio-blk: add virtio_blk_fix_dwz_hdr() function This function is useful to fix the endianness of struct virtio_blk_discard_write_zeroes headers. Reviewed-by: Stefan Hajnoczi Signed-off-by: Stefano Garzarella Message-id: 20190221103314.58500-9-sgarzare@redhat.com Message-Id: <20190221103314.58500-9-sgarzare@redhat.com> Signed-off-by: Stefan Hajnoczi --- tests/virtio-blk-test.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c index 0739498da7..85b6b99adc 100644 --- a/tests/virtio-blk-test.c +++ b/tests/virtio-blk-test.c @@ -46,6 +46,12 @@ typedef struct QVirtioBlkReq { uint8_t status; } QVirtioBlkReq; +#ifdef HOST_WORDS_BIGENDIAN +const bool host_is_big_endian = true; +#else +const bool host_is_big_endian; /* false */ +#endif + static char *drive_create(void) { int fd, ret; @@ -125,12 +131,6 @@ static QVirtioPCIDevice *virtio_blk_pci_init(QPCIBus *bus, int slot) static inline void virtio_blk_fix_request(QVirtioDevice *d, QVirtioBlkReq *req) { -#ifdef HOST_WORDS_BIGENDIAN - const bool host_is_big_endian = true; -#else - const bool host_is_big_endian = false; -#endif - if (qvirtio_is_big_endian(d) != host_is_big_endian) { req->type = bswap32(req->type); req->ioprio = bswap32(req->ioprio); @@ -138,6 +138,17 @@ static inline void virtio_blk_fix_request(QVirtioDevice *d, QVirtioBlkReq *req) } } + +static inline void virtio_blk_fix_dwz_hdr(QVirtioDevice *d, + struct virtio_blk_discard_write_zeroes *dwz_hdr) +{ + if (qvirtio_is_big_endian(d) != host_is_big_endian) { + dwz_hdr->sector = bswap64(dwz_hdr->sector); + dwz_hdr->num_sectors = bswap32(dwz_hdr->num_sectors); + dwz_hdr->flags = bswap32(dwz_hdr->flags); + } +} + static uint64_t virtio_blk_request(QGuestAllocator *alloc, QVirtioDevice *d, QVirtioBlkReq *req, uint64_t data_size) { From 06879094bb7a50dd5ee503f3f0e208e0cd91dbc3 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 21 Feb 2019 11:33:13 +0100 Subject: [PATCH 26/27] tests/virtio-blk: add test for WRITE_ZEROES command If the WRITE_ZEROES feature is enabled, we check this command in the test_basic(). Reviewed-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Acked-by: Thomas Huth Signed-off-by: Stefano Garzarella Message-id: 20190221103314.58500-10-sgarzare@redhat.com Message-Id: <20190221103314.58500-10-sgarzare@redhat.com> Signed-off-by: Stefan Hajnoczi --- tests/virtio-blk-test.c | 62 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c index 85b6b99adc..f43d971099 100644 --- a/tests/virtio-blk-test.c +++ b/tests/virtio-blk-test.c @@ -255,6 +255,68 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, guest_free(alloc, req_addr); + if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) { + struct virtio_blk_discard_write_zeroes dwz_hdr; + void *expected; + + /* + * WRITE_ZEROES request on the same sector of previous test where + * we wrote "TEST". + */ + req.type = VIRTIO_BLK_T_WRITE_ZEROES; + req.data = (char *) &dwz_hdr; + dwz_hdr.sector = 0; + dwz_hdr.num_sectors = 1; + dwz_hdr.flags = 0; + + virtio_blk_fix_dwz_hdr(dev, &dwz_hdr); + + req_addr = virtio_blk_request(alloc, dev, &req, sizeof(dwz_hdr)); + + free_head = qvirtqueue_add(vq, req_addr, 16, false, true); + qvirtqueue_add(vq, req_addr + 16, sizeof(dwz_hdr), false, true); + qvirtqueue_add(vq, req_addr + 16 + sizeof(dwz_hdr), 1, true, false); + + qvirtqueue_kick(dev, vq, free_head); + + qvirtio_wait_used_elem(dev, vq, free_head, NULL, + QVIRTIO_BLK_TIMEOUT_US); + status = readb(req_addr + 16 + sizeof(dwz_hdr)); + g_assert_cmpint(status, ==, 0); + + guest_free(alloc, req_addr); + + /* Read request to check if the sector contains all zeroes */ + req.type = VIRTIO_BLK_T_IN; + req.ioprio = 1; + req.sector = 0; + req.data = g_malloc0(512); + + req_addr = virtio_blk_request(alloc, dev, &req, 512); + + g_free(req.data); + + free_head = qvirtqueue_add(vq, req_addr, 16, false, true); + qvirtqueue_add(vq, req_addr + 16, 512, true, true); + qvirtqueue_add(vq, req_addr + 528, 1, true, false); + + qvirtqueue_kick(dev, vq, free_head); + + qvirtio_wait_used_elem(dev, vq, free_head, NULL, + QVIRTIO_BLK_TIMEOUT_US); + status = readb(req_addr + 528); + g_assert_cmpint(status, ==, 0); + + data = g_malloc(512); + expected = g_malloc0(512); + memread(req_addr + 16, data, 512); + g_assert_cmpmem(data, 512, expected, 512); + g_free(expected); + g_free(data); + + guest_free(alloc, req_addr); + } + if (features & (1u << VIRTIO_F_ANY_LAYOUT)) { /* Write and read with 2 descriptor layout */ /* Write request */ From 9a9f4b74fa547b68edb38fa414999836770a4735 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 21 Feb 2019 11:33:14 +0100 Subject: [PATCH 27/27] tests/virtio-blk: add test for DISCARD command If the DISCARD feature is enabled, we try this command in the test_basic(), checking only the status returned by the request. Reviewed-by: Stefan Hajnoczi Signed-off-by: Stefano Garzarella Message-id: 20190221103314.58500-11-sgarzare@redhat.com Message-Id: <20190221103314.58500-11-sgarzare@redhat.com> Signed-off-by: Stefan Hajnoczi --- tests/virtio-blk-test.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c index f43d971099..8d2fc9c710 100644 --- a/tests/virtio-blk-test.c +++ b/tests/virtio-blk-test.c @@ -317,6 +317,33 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, guest_free(alloc, req_addr); } + if (features & (1u << VIRTIO_BLK_F_DISCARD)) { + struct virtio_blk_discard_write_zeroes dwz_hdr; + + req.type = VIRTIO_BLK_T_DISCARD; + req.data = (char *) &dwz_hdr; + dwz_hdr.sector = 0; + dwz_hdr.num_sectors = 1; + dwz_hdr.flags = 0; + + virtio_blk_fix_dwz_hdr(dev, &dwz_hdr); + + req_addr = virtio_blk_request(alloc, dev, &req, sizeof(dwz_hdr)); + + free_head = qvirtqueue_add(vq, req_addr, 16, false, true); + qvirtqueue_add(vq, req_addr + 16, sizeof(dwz_hdr), false, true); + qvirtqueue_add(vq, req_addr + 16 + sizeof(dwz_hdr), 1, true, false); + + qvirtqueue_kick(dev, vq, free_head); + + qvirtio_wait_used_elem(dev, vq, free_head, NULL, + QVIRTIO_BLK_TIMEOUT_US); + status = readb(req_addr + 16 + sizeof(dwz_hdr)); + g_assert_cmpint(status, ==, 0); + + guest_free(alloc, req_addr); + } + if (features & (1u << VIRTIO_F_ANY_LAYOUT)) { /* Write and read with 2 descriptor layout */ /* Write request */