From 5f81724d80a1492c73d329242663962139db739b Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 17 Nov 2015 14:59:52 -0500 Subject: [PATCH 1/6] ide/atapi: make PIO read requests async PIO read requests on the ATAPI interface used to be sync blk requests. This has two significant drawbacks. First the main loop hangs util an I/O request is completed and secondly if the I/O request does not complete (e.g. due to an unresponsive storage) Qemu hangs completely. Note: Due to possible race conditions requests during an ongoing elementary transfer are still sync. Signed-off-by: Peter Lieven Reviewed-by: John Snow Message-id: 1447345846-15624-2-git-send-email-pl@kamp.de Signed-off-by: John Snow --- hw/ide/atapi.c | 95 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 82 insertions(+), 13 deletions(-) diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c index ed8bb2c80a..eb0d964215 100644 --- a/hw/ide/atapi.c +++ b/hw/ide/atapi.c @@ -105,20 +105,27 @@ static void cd_data_to_raw(uint8_t *buf, int lba) memset(buf, 0, 288); } -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size) +static int +cd_read_sector_sync(IDEState *s) { int ret; block_acct_start(blk_get_stats(s->blk), &s->acct, 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); - switch(sector_size) { +#ifdef DEBUG_IDE_ATAPI + printf("cd_read_sector_sync: lba=%d\n", s->lba); +#endif + + switch (s->cd_sector_size) { case 2048: - ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4); + ret = blk_read(s->blk, (int64_t)s->lba << 2, + s->io_buffer, 4); break; case 2352: - ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4); + ret = blk_read(s->blk, (int64_t)s->lba << 2, + s->io_buffer + 16, 4); if (ret >= 0) { - cd_data_to_raw(buf, lba); + cd_data_to_raw(s->io_buffer, s->lba); } break; default: @@ -130,11 +137,65 @@ static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size) block_acct_failed(blk_get_stats(s->blk), &s->acct); } else { block_acct_done(blk_get_stats(s->blk), &s->acct); + s->lba++; + s->io_buffer_index = 0; } return ret; } +static void cd_read_sector_cb(void *opaque, int ret) +{ + IDEState *s = opaque; + + block_acct_done(blk_get_stats(s->blk), &s->acct); + +#ifdef DEBUG_IDE_ATAPI + printf("cd_read_sector_cb: lba=%d ret=%d\n", s->lba, ret); +#endif + + if (ret < 0) { + ide_atapi_io_error(s, ret); + return; + } + + if (s->cd_sector_size == 2352) { + cd_data_to_raw(s->io_buffer, s->lba); + } + + s->lba++; + s->io_buffer_index = 0; + s->status &= ~BUSY_STAT; + + ide_atapi_cmd_reply_end(s); +} + +static int cd_read_sector(IDEState *s) +{ + if (s->cd_sector_size != 2048 && s->cd_sector_size != 2352) { + return -EINVAL; + } + + s->iov.iov_base = (s->cd_sector_size == 2352) ? + s->io_buffer + 16 : s->io_buffer; + + s->iov.iov_len = 4 * BDRV_SECTOR_SIZE; + qemu_iovec_init_external(&s->qiov, &s->iov, 1); + +#ifdef DEBUG_IDE_ATAPI + printf("cd_read_sector: lba=%d\n", s->lba); +#endif + + block_acct_start(blk_get_stats(s->blk), &s->acct, + 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); + + blk_aio_readv(s->blk, (int64_t)s->lba << 2, &s->qiov, 4, + cd_read_sector_cb, s); + + s->status |= BUSY_STAT; + return 0; +} + void ide_atapi_cmd_ok(IDEState *s) { s->error = 0; @@ -196,18 +257,27 @@ void ide_atapi_cmd_reply_end(IDEState *s) ide_atapi_cmd_ok(s); ide_set_irq(s->bus); #ifdef DEBUG_IDE_ATAPI - printf("status=0x%x\n", s->status); + printf("end of transfer, status=0x%x\n", s->status); #endif } else { /* see if a new sector must be read */ if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) { - ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size); - if (ret < 0) { - ide_atapi_io_error(s, ret); + if (!s->elementary_transfer_size) { + ret = cd_read_sector(s); + if (ret < 0) { + ide_atapi_io_error(s, ret); + } return; + } else { + /* rebuffering within an elementary transfer is + * only possible with a sync request because we + * end up with a race condition otherwise */ + ret = cd_read_sector_sync(s); + if (ret < 0) { + ide_atapi_io_error(s, ret); + return; + } } - s->lba++; - s->io_buffer_index = 0; } if (s->elementary_transfer_size > 0) { /* there are some data left to transmit in this elementary @@ -287,7 +357,6 @@ static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors, s->io_buffer_index = sector_size; s->cd_sector_size = sector_size; - s->status = READY_STAT | SEEK_STAT; ide_atapi_cmd_reply_end(s); } @@ -372,7 +441,7 @@ eot: if (ret < 0) { block_acct_failed(blk_get_stats(s->blk), &s->acct); } else { - block_acct_done(blk_get_stats(s->blk), &s->acct); + block_acct_done(blk_get_stats(s->blk), &s->acct); } ide_set_inactive(s, false); } From ca78ecfa72f311cd647b12a41d93e1ce54f18e66 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 17 Nov 2015 15:06:21 -0500 Subject: [PATCH 2/6] block: add blk_abort_aio_request Signed-off-by: Peter Lieven Reviewed-by: Fam Zheng Message-id: 1447345846-15624-3-git-send-email-pl@kamp.de Signed-off-by: John Snow --- block/block-backend.c | 17 +++++++++-------- include/sysemu/block-backend.h | 3 +++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 9889e813b6..36ccc9e616 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -642,8 +642,9 @@ static void error_callback_bh(void *opaque) qemu_aio_unref(acb); } -static BlockAIOCB *abort_aio_request(BlockBackend *blk, BlockCompletionFunc *cb, - void *opaque, int ret) +BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, + BlockCompletionFunc *cb, + void *opaque, int ret) { struct BlockBackendAIOCB *acb; QEMUBH *bh; @@ -665,7 +666,7 @@ BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num, { int ret = blk_check_request(blk, sector_num, nb_sectors); if (ret < 0) { - return abort_aio_request(blk, cb, opaque, ret); + return blk_abort_aio_request(blk, cb, opaque, ret); } return bdrv_aio_write_zeroes(blk->bs, sector_num, nb_sectors, flags, @@ -725,7 +726,7 @@ BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num, { int ret = blk_check_request(blk, sector_num, nb_sectors); if (ret < 0) { - return abort_aio_request(blk, cb, opaque, ret); + return blk_abort_aio_request(blk, cb, opaque, ret); } return bdrv_aio_readv(blk->bs, sector_num, iov, nb_sectors, cb, opaque); @@ -737,7 +738,7 @@ BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num, { int ret = blk_check_request(blk, sector_num, nb_sectors); if (ret < 0) { - return abort_aio_request(blk, cb, opaque, ret); + return blk_abort_aio_request(blk, cb, opaque, ret); } return bdrv_aio_writev(blk->bs, sector_num, iov, nb_sectors, cb, opaque); @@ -747,7 +748,7 @@ BlockAIOCB *blk_aio_flush(BlockBackend *blk, BlockCompletionFunc *cb, void *opaque) { if (!blk_is_available(blk)) { - return abort_aio_request(blk, cb, opaque, -ENOMEDIUM); + return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM); } return bdrv_aio_flush(blk->bs, cb, opaque); @@ -759,7 +760,7 @@ BlockAIOCB *blk_aio_discard(BlockBackend *blk, { int ret = blk_check_request(blk, sector_num, nb_sectors); if (ret < 0) { - return abort_aio_request(blk, cb, opaque, ret); + return blk_abort_aio_request(blk, cb, opaque, ret); } return bdrv_aio_discard(blk->bs, sector_num, nb_sectors, cb, opaque); @@ -802,7 +803,7 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, BlockCompletionFunc *cb, void *opaque) { if (!blk_is_available(blk)) { - return abort_aio_request(blk, cb, opaque, -ENOMEDIUM); + return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM); } return bdrv_aio_ioctl(blk->bs, req, buf, cb, opaque); diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index f4a68e291b..fb068ea47b 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -184,5 +184,8 @@ int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size); int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz); int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo); +BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, + BlockCompletionFunc *cb, + void *opaque, int ret); #endif From 1d8c11d631545ee43aff16b0763aff7181b61f20 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 17 Nov 2015 15:06:25 -0500 Subject: [PATCH 3/6] ide: add support for IDEBufferedRequest this patch adds a new aio readv compatible function which copies all data through a bounce buffer. These buffered requests can be flagged as orphaned which means that their original callback has already been invoked and the request has just not been completed by the backend storage. The bounce buffer guarantees that guest memory corruption is avoided when such a orphaned request is completed by the backend at a later stage. This trick only works for read requests as a write request completed at a later stage might corrupt data as there is no way to control if and what data has already been written to the storage. Signed-off-by: Peter Lieven Reviewed-by: Fam Zheng Message-id: 1447345846-15624-4-git-send-email-pl@kamp.de Signed-off-by: John Snow --- hw/ide/core.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ hw/ide/internal.h | 14 ++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/hw/ide/core.c b/hw/ide/core.c index 2725dd3b81..1470737e34 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -561,6 +561,53 @@ static bool ide_sect_range_ok(IDEState *s, return true; } +static void ide_buffered_readv_cb(void *opaque, int ret) +{ + IDEBufferedRequest *req = opaque; + if (!req->orphaned) { + if (!ret) { + qemu_iovec_from_buf(req->original_qiov, 0, req->iov.iov_base, + req->original_qiov->size); + } + req->original_cb(req->original_opaque, ret); + } + QLIST_REMOVE(req, list); + qemu_vfree(req->iov.iov_base); + g_free(req); +} + +#define MAX_BUFFERED_REQS 16 + +BlockAIOCB *ide_buffered_readv(IDEState *s, int64_t sector_num, + QEMUIOVector *iov, int nb_sectors, + BlockCompletionFunc *cb, void *opaque) +{ + BlockAIOCB *aioreq; + IDEBufferedRequest *req; + int c = 0; + + QLIST_FOREACH(req, &s->buffered_requests, list) { + c++; + } + if (c > MAX_BUFFERED_REQS) { + return blk_abort_aio_request(s->blk, cb, opaque, -EIO); + } + + req = g_new0(IDEBufferedRequest, 1); + req->original_qiov = iov; + req->original_cb = cb; + req->original_opaque = opaque; + req->iov.iov_base = qemu_blockalign(blk_bs(s->blk), iov->size); + req->iov.iov_len = iov->size; + qemu_iovec_init_external(&req->qiov, &req->iov, 1); + + aioreq = blk_aio_readv(s->blk, sector_num, &req->qiov, nb_sectors, + ide_buffered_readv_cb, req); + + QLIST_INSERT_HEAD(&s->buffered_requests, req, list); + return aioreq; +} + static void ide_sector_read(IDEState *s); static void ide_sector_read_cb(void *opaque, int ret) diff --git a/hw/ide/internal.h b/hw/ide/internal.h index e4629b023a..2d1e2d2d2f 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -343,6 +343,16 @@ enum ide_dma_cmd { #define ide_cmd_is_read(s) \ ((s)->dma_cmd == IDE_DMA_READ) +typedef struct IDEBufferedRequest { + QLIST_ENTRY(IDEBufferedRequest) list; + struct iovec iov; + QEMUIOVector qiov; + QEMUIOVector *original_qiov; + BlockCompletionFunc *original_cb; + void *original_opaque; + bool orphaned; +} IDEBufferedRequest; + /* NOTE: IDEState represents in fact one drive */ struct IDEState { IDEBus *bus; @@ -396,6 +406,7 @@ struct IDEState { BlockAIOCB *pio_aiocb; struct iovec iov; QEMUIOVector qiov; + QLIST_HEAD(, IDEBufferedRequest) buffered_requests; /* ATA DMA state */ uint64_t io_buffer_offset; int32_t io_buffer_size; @@ -572,6 +583,9 @@ void ide_set_inactive(IDEState *s, bool more); BlockAIOCB *ide_issue_trim(BlockBackend *blk, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *ide_buffered_readv(IDEState *s, int64_t sector_num, + QEMUIOVector *iov, int nb_sectors, + BlockCompletionFunc *cb, void *opaque); /* hw/ide/atapi.c */ void ide_atapi_cmd(IDEState *s); From 7cda62087c0baf064486f3d803184c2c3b35c04a Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 17 Nov 2015 15:06:29 -0500 Subject: [PATCH 4/6] ide: orphan all buffered requests on DMA cancel If the guests canceles a DMA request we can prematurely invoke all callbacks of buffered requests and flag all them as orphaned. Ideally this avoids the need for draining all requests. For CDROM devices this works in 100% of all cases. Signed-off-by: Peter Lieven Reviewed-by: Fam Zheng Message-id: 1447345846-15624-5-git-send-email-pl@kamp.de Signed-off-by: John Snow --- hw/ide/pci.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/hw/ide/pci.c b/hw/ide/pci.c index 9c54b378d6..37dbc291da 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -233,6 +233,22 @@ void bmdma_cmd_writeb(BMDMAState *bm, uint32_t val) /* Ignore writes to SSBM if it keeps the old value */ if ((val & BM_CMD_START) != (bm->cmd & BM_CMD_START)) { if (!(val & BM_CMD_START)) { + /* First invoke the callbacks of all buffered requests + * and flag those requests as orphaned. Ideally there + * are no unbuffered (Scatter Gather DMA Requests or + * write requests) pending and we can avoid to drain. */ + IDEBufferedRequest *req; + IDEState *s = idebus_active_if(bm->bus); + QLIST_FOREACH(req, &s->buffered_requests, list) { + if (!req->orphaned) { +#ifdef DEBUG_IDE + printf("%s: invoking cb %p of buffered request %p with" + " -ECANCELED\n", __func__, req->original_cb, req); +#endif + req->original_cb(req->original_opaque, -ECANCELED); + } + req->orphaned = true; + } /* * We can't cancel Scatter Gather DMA in the middle of the * operation or a partial (not full) DMA transfer would reach @@ -246,6 +262,9 @@ void bmdma_cmd_writeb(BMDMAState *bm, uint32_t val) * aio operation with preadv/pwritev. */ if (bm->bus->dma->aiocb) { +#ifdef DEBUG_IDE + printf("%s: draining all remaining requests", __func__); +#endif blk_drain_all(); assert(bm->bus->dma->aiocb == NULL); } From 02506b20b6609ed4ecb09de9900ba9f1dd20b205 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 17 Nov 2015 15:06:33 -0500 Subject: [PATCH 5/6] ide: enable buffered requests for ATAPI devices Signed-off-by: Peter Lieven Reviewed-by: Fam Zheng Message-id: 1447345846-15624-6-git-send-email-pl@kamp.de Signed-off-by: John Snow --- hw/ide/atapi.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c index eb0d964215..7b9f74c3b5 100644 --- a/hw/ide/atapi.c +++ b/hw/ide/atapi.c @@ -189,8 +189,8 @@ static int cd_read_sector(IDEState *s) block_acct_start(blk_get_stats(s->blk), &s->acct, 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); - blk_aio_readv(s->blk, (int64_t)s->lba << 2, &s->qiov, 4, - cd_read_sector_cb, s); + ide_buffered_readv(s, (int64_t)s->lba << 2, &s->qiov, 4, + cd_read_sector_cb, s); s->status |= BUSY_STAT; return 0; @@ -432,9 +432,9 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret) s->bus->dma->iov.iov_len = n * 4 * 512; qemu_iovec_init_external(&s->bus->dma->qiov, &s->bus->dma->iov, 1); - s->bus->dma->aiocb = blk_aio_readv(s->blk, (int64_t)s->lba << 2, - &s->bus->dma->qiov, n * 4, - ide_atapi_cmd_read_dma_cb, s); + s->bus->dma->aiocb = ide_buffered_readv(s, (int64_t)s->lba << 2, + &s->bus->dma->qiov, n * 4, + ide_atapi_cmd_read_dma_cb, s); return; eot: From d66a8fa83b00b3b3d631a0e28cdce8c9b5698822 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 17 Nov 2015 15:06:39 -0500 Subject: [PATCH 6/6] ide: enable buffered requests for PIO read requests Signed-off-by: Peter Lieven Reviewed-by: Fam Zheng Message-id: 1447345846-15624-7-git-send-email-pl@kamp.de Signed-off-by: John Snow --- hw/ide/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/ide/core.c b/hw/ide/core.c index 1470737e34..da3baab1eb 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -679,8 +679,8 @@ static void ide_sector_read(IDEState *s) block_acct_start(blk_get_stats(s->blk), &s->acct, n * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); - s->pio_aiocb = blk_aio_readv(s->blk, sector_num, &s->qiov, n, - ide_sector_read_cb, s); + s->pio_aiocb = ide_buffered_readv(s, sector_num, &s->qiov, n, + ide_sector_read_cb, s); } void dma_buf_commit(IDEState *s, uint32_t tx_bytes)