qcow2: Avoid bounce buffers for AIO read requests

qcow2 used to use bounce buffers for any AIO requests. This does not only imply
unnecessary copying, but also unbounded allocations which should be avoided.

This patch removes bounce buffers from the normal AIO read path, and constrains
them to a constant size for encrypted images.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
This commit is contained in:
Kevin Wolf 2010-09-13 18:08:52 +02:00
parent b8a83a4f79
commit bd28f83565
3 changed files with 68 additions and 30 deletions

View File

@ -350,6 +350,8 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num,
BDRVQcowState *s = bs->opaque; BDRVQcowState *s = bs->opaque;
int ret, index_in_cluster, n, n1; int ret, index_in_cluster, n, n1;
uint64_t cluster_offset; uint64_t cluster_offset;
struct iovec iov;
QEMUIOVector qiov;
while (nb_sectors > 0) { while (nb_sectors > 0) {
n = nb_sectors; n = nb_sectors;
@ -364,7 +366,11 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num,
if (!cluster_offset) { if (!cluster_offset) {
if (bs->backing_hd) { if (bs->backing_hd) {
/* read from the base image */ /* read from the base image */
n1 = qcow2_backing_read1(bs->backing_hd, sector_num, buf, n); iov.iov_base = buf;
iov.iov_len = n * 512;
qemu_iovec_init_external(&qiov, &iov, 1);
n1 = qcow2_backing_read1(bs->backing_hd, &qiov, sector_num, n);
if (n1 > 0) { if (n1 > 0) {
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING); BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING);
ret = bdrv_read(bs->backing_hd, sector_num, buf, n1); ret = bdrv_read(bs->backing_hd, sector_num, buf, n1);

View File

@ -311,8 +311,8 @@ static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num,
} }
/* handle reading after the end of the backing file */ /* handle reading after the end of the backing file */
int qcow2_backing_read1(BlockDriverState *bs, int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t sector_num, uint8_t *buf, int nb_sectors) int64_t sector_num, int nb_sectors)
{ {
int n1; int n1;
if ((sector_num + nb_sectors) <= bs->total_sectors) if ((sector_num + nb_sectors) <= bs->total_sectors)
@ -321,7 +321,9 @@ int qcow2_backing_read1(BlockDriverState *bs,
n1 = 0; n1 = 0;
else else
n1 = bs->total_sectors - sector_num; n1 = bs->total_sectors - sector_num;
memset(buf + n1 * 512, 0, 512 * (nb_sectors - n1));
qemu_iovec_memset(qiov, 0, 512 * (nb_sectors - n1));
return n1; return n1;
} }
@ -333,6 +335,7 @@ typedef struct QCowAIOCB {
void *orig_buf; void *orig_buf;
int remaining_sectors; int remaining_sectors;
int cur_nr_sectors; /* number of sectors in current iteration */ int cur_nr_sectors; /* number of sectors in current iteration */
uint64_t bytes_done;
uint64_t cluster_offset; uint64_t cluster_offset;
uint8_t *cluster_data; uint8_t *cluster_data;
BlockDriverAIOCB *hd_aiocb; BlockDriverAIOCB *hd_aiocb;
@ -397,15 +400,19 @@ static void qcow_aio_read_cb(void *opaque, int ret)
/* nothing to do */ /* nothing to do */
} else { } else {
if (s->crypt_method) { if (s->crypt_method) {
qcow2_encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf, qcow2_encrypt_sectors(s, acb->sector_num, acb->cluster_data,
acb->cur_nr_sectors, 0, acb->cluster_data, acb->cur_nr_sectors, 0, &s->aes_decrypt_key);
&s->aes_decrypt_key); qemu_iovec_reset(&acb->hd_qiov);
qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done,
acb->cur_nr_sectors * 512);
qemu_iovec_from_buffer(&acb->hd_qiov, acb->cluster_data,
512 * acb->cur_nr_sectors);
} }
} }
acb->remaining_sectors -= acb->cur_nr_sectors; acb->remaining_sectors -= acb->cur_nr_sectors;
acb->sector_num += acb->cur_nr_sectors; acb->sector_num += acb->cur_nr_sectors;
acb->buf += acb->cur_nr_sectors * 512; acb->bytes_done += acb->cur_nr_sectors * 512;
if (acb->remaining_sectors == 0) { if (acb->remaining_sectors == 0) {
/* request completed */ /* request completed */
@ -415,6 +422,11 @@ static void qcow_aio_read_cb(void *opaque, int ret)
/* prepare next AIO request */ /* prepare next AIO request */
acb->cur_nr_sectors = acb->remaining_sectors; acb->cur_nr_sectors = acb->remaining_sectors;
if (s->crypt_method) {
acb->cur_nr_sectors = MIN(acb->cur_nr_sectors,
QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
}
ret = qcow2_get_cluster_offset(bs, acb->sector_num << 9, ret = qcow2_get_cluster_offset(bs, acb->sector_num << 9,
&acb->cur_nr_sectors, &acb->cluster_offset); &acb->cur_nr_sectors, &acb->cluster_offset);
if (ret < 0) { if (ret < 0) {
@ -423,15 +435,17 @@ static void qcow_aio_read_cb(void *opaque, int ret)
index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
qemu_iovec_reset(&acb->hd_qiov);
qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done,
acb->cur_nr_sectors * 512);
if (!acb->cluster_offset) { if (!acb->cluster_offset) {
if (bs->backing_hd) { if (bs->backing_hd) {
/* read from the base image */ /* read from the base image */
n1 = qcow2_backing_read1(bs->backing_hd, acb->sector_num, n1 = qcow2_backing_read1(bs->backing_hd, &acb->hd_qiov,
acb->buf, acb->cur_nr_sectors); acb->sector_num, acb->cur_nr_sectors);
if (n1 > 0) { if (n1 > 0) {
acb->hd_iov.iov_base = (void *)acb->buf;
acb->hd_iov.iov_len = acb->cur_nr_sectors * 512;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num, acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
&acb->hd_qiov, acb->cur_nr_sectors, &acb->hd_qiov, acb->cur_nr_sectors,
@ -445,7 +459,7 @@ static void qcow_aio_read_cb(void *opaque, int ret)
} }
} else { } else {
/* Note: in this case, no need to wait */ /* Note: in this case, no need to wait */
memset(acb->buf, 0, 512 * acb->cur_nr_sectors); qemu_iovec_memset(&acb->hd_qiov, 0, 512 * acb->cur_nr_sectors);
ret = qcow_schedule_bh(qcow_aio_read_bh, acb); ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
if (ret < 0) if (ret < 0)
goto done; goto done;
@ -454,8 +468,11 @@ static void qcow_aio_read_cb(void *opaque, int ret)
/* add AIO support for compressed blocks ? */ /* add AIO support for compressed blocks ? */
if (qcow2_decompress_cluster(bs, acb->cluster_offset) < 0) if (qcow2_decompress_cluster(bs, acb->cluster_offset) < 0)
goto done; goto done;
memcpy(acb->buf, s->cluster_cache + index_in_cluster * 512,
qemu_iovec_from_buffer(&acb->hd_qiov,
s->cluster_cache + index_in_cluster * 512,
512 * acb->cur_nr_sectors); 512 * acb->cur_nr_sectors);
ret = qcow_schedule_bh(qcow_aio_read_bh, acb); ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
if (ret < 0) if (ret < 0)
goto done; goto done;
@ -465,9 +482,23 @@ static void qcow_aio_read_cb(void *opaque, int ret)
goto done; goto done;
} }
acb->hd_iov.iov_base = (void *)acb->buf; if (s->crypt_method) {
acb->hd_iov.iov_len = acb->cur_nr_sectors * 512; /*
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); * For encrypted images, read everything into a temporary
* contiguous buffer on which the AES functions can work.
*/
if (!acb->cluster_data) {
acb->cluster_data =
qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
}
assert(acb->cur_nr_sectors <=
QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
qemu_iovec_reset(&acb->hd_qiov);
qemu_iovec_add(&acb->hd_qiov, acb->cluster_data,
512 * acb->cur_nr_sectors);
}
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
acb->hd_aiocb = bdrv_aio_readv(bs->file, acb->hd_aiocb = bdrv_aio_readv(bs->file,
(acb->cluster_offset >> 9) + index_in_cluster, (acb->cluster_offset >> 9) + index_in_cluster,
@ -481,11 +512,8 @@ static void qcow_aio_read_cb(void *opaque, int ret)
return; return;
done: done:
if (acb->qiov->niov > 1) {
qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
qemu_vfree(acb->orig_buf);
}
acb->common.cb(acb->common.opaque, ret); acb->common.cb(acb->common.opaque, ret);
qemu_iovec_destroy(&acb->hd_qiov);
qemu_aio_release(acb); qemu_aio_release(acb);
} }
@ -501,13 +529,17 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
acb->hd_aiocb = NULL; acb->hd_aiocb = NULL;
acb->sector_num = sector_num; acb->sector_num = sector_num;
acb->qiov = qiov; acb->qiov = qiov;
if (qiov->niov > 1) {
acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size); if (!is_write) {
if (is_write) qemu_iovec_init(&acb->hd_qiov, qiov->niov);
qemu_iovec_to_buffer(qiov, acb->buf); } else if (qiov->niov == 1) {
} else {
acb->buf = (uint8_t *)qiov->iov->iov_base; acb->buf = (uint8_t *)qiov->iov->iov_base;
} else {
acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size);
qemu_iovec_to_buffer(qiov, acb->buf);
} }
acb->bytes_done = 0;
acb->remaining_sectors = nb_sectors; acb->remaining_sectors = nb_sectors;
acb->cur_nr_sectors = 0; acb->cur_nr_sectors = 0;
acb->cluster_offset = 0; acb->cluster_offset = 0;

View File

@ -166,8 +166,8 @@ static inline int64_t align_offset(int64_t offset, int n)
// FIXME Need qcow2_ prefix to global functions // FIXME Need qcow2_ prefix to global functions
/* qcow2.c functions */ /* qcow2.c functions */
int qcow2_backing_read1(BlockDriverState *bs, int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t sector_num, uint8_t *buf, int nb_sectors); int64_t sector_num, int nb_sectors);
/* qcow2-refcount.c functions */ /* qcow2-refcount.c functions */
int qcow2_refcount_init(BlockDriverState *bs); int qcow2_refcount_init(BlockDriverState *bs);