From 7bf4162a801761792834e9e7da4b80b089434ce7 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Fri, 10 Jun 2011 22:05:30 +0200 Subject: [PATCH 01/14] block/rbd: Remove unused local variable Variable 'snap' is assigned a value that is never used. Remove snap and the related code. Cc: Christian Brunner Cc: Josh Durgin Cc: Kevin Wolf Signed-off-by: Stefan Weil Reviewed-by: Josh Durgin Signed-off-by: Kevin Wolf --- block/rbd.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/block/rbd.c b/block/rbd.c index bdc448aa9f..d5659cdf19 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -227,7 +227,6 @@ static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options) char name[RBD_MAX_IMAGE_NAME_SIZE]; char snap_buf[RBD_MAX_SNAP_NAME_SIZE]; char conf[RBD_MAX_CONF_SIZE]; - char *snap = NULL; rados_t cluster; rados_ioctx_t io_ctx; int ret; @@ -238,9 +237,6 @@ static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options) conf, sizeof(conf)) < 0) { return -EINVAL; } - if (snap_buf[0] != '\0') { - snap = snap_buf; - } /* Read out options */ while (options && options->name) { From 42496d6240bfedc7ac6d92f04f92cff6c2e9f226 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 7 Jun 2011 15:04:32 +0200 Subject: [PATCH 02/14] qcow2: Avoid direct AIO callback bdrv_aio_* must not call the callback before returning to its caller. In qcow2, this could happen in some error cases. This starts the real requests processing in a BH to avoid this situation. Signed-off-by: Kevin Wolf --- block/qcow2.c | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 8451ded9a3..2c51e7ccbd 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -378,6 +378,7 @@ typedef struct QCowAIOCB { uint64_t bytes_done; uint64_t cluster_offset; uint8_t *cluster_data; + bool is_write; BlockDriverAIOCB *hd_aiocb; QEMUIOVector hd_qiov; QEMUBH *bh; @@ -399,12 +400,19 @@ static AIOPool qcow2_aio_pool = { }; static void qcow2_aio_read_cb(void *opaque, int ret); -static void qcow2_aio_read_bh(void *opaque) +static void qcow2_aio_write_cb(void *opaque, int ret); + +static void qcow2_aio_rw_bh(void *opaque) { QCowAIOCB *acb = opaque; qemu_bh_delete(acb->bh); acb->bh = NULL; - qcow2_aio_read_cb(opaque, 0); + + if (acb->is_write) { + qcow2_aio_write_cb(opaque, 0); + } else { + qcow2_aio_read_cb(opaque, 0); + } } static int qcow2_schedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb) @@ -493,14 +501,14 @@ static void qcow2_aio_read_cb(void *opaque, int ret) goto done; } } else { - ret = qcow2_schedule_bh(qcow2_aio_read_bh, acb); + ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb); if (ret < 0) goto done; } } else { /* Note: in this case, no need to wait */ qemu_iovec_memset(&acb->hd_qiov, 0, 512 * acb->cur_nr_sectors); - ret = qcow2_schedule_bh(qcow2_aio_read_bh, acb); + ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb); if (ret < 0) goto done; } @@ -515,7 +523,7 @@ static void qcow2_aio_read_cb(void *opaque, int ret) s->cluster_cache + index_in_cluster * 512, 512 * acb->cur_nr_sectors); - ret = qcow2_schedule_bh(qcow2_aio_read_bh, acb); + ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb); if (ret < 0) goto done; } else { @@ -572,6 +580,7 @@ static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, int64_t sector_num, acb->hd_aiocb = NULL; acb->sector_num = sector_num; acb->qiov = qiov; + acb->is_write = is_write; qemu_iovec_init(&acb->hd_qiov, qiov->niov); @@ -591,17 +600,22 @@ static BlockDriverAIOCB *qcow2_aio_readv(BlockDriverState *bs, void *opaque) { QCowAIOCB *acb; + int ret; acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); if (!acb) return NULL; - qcow2_aio_read_cb(acb, 0); + ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb); + if (ret < 0) { + qemu_iovec_destroy(&acb->hd_qiov); + qemu_aio_release(acb); + return NULL; + } + return &acb->common; } -static void qcow2_aio_write_cb(void *opaque, int ret); - static void run_dependent_requests(QCowL2Meta *m) { QCowAIOCB *req; @@ -724,6 +738,7 @@ static BlockDriverAIOCB *qcow2_aio_writev(BlockDriverState *bs, { BDRVQcowState *s = bs->opaque; QCowAIOCB *acb; + int ret; s->cluster_cache_offset = -1; /* disable compressed cache */ @@ -731,7 +746,13 @@ static BlockDriverAIOCB *qcow2_aio_writev(BlockDriverState *bs, if (!acb) return NULL; - qcow2_aio_write_cb(acb, 0); + ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb); + if (ret < 0) { + qemu_iovec_destroy(&acb->hd_qiov); + qemu_aio_release(acb); + return NULL; + } + return &acb->common; } From b11a24dee661dd1e1de0dcbc149052ed67b0647a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 7 Jun 2011 15:20:44 +0200 Subject: [PATCH 03/14] qcow: Avoid direct AIO callback bdrv_aio_* must not call the callback before returning to its caller. In qcow, this could happen in some error cases. This starts the real requests processing in a BH to avoid this situation. Signed-off-by: Kevin Wolf --- block/qcow.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/block/qcow.c b/block/qcow.c index a26c88620f..227b104e36 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -496,6 +496,8 @@ typedef struct QCowAIOCB { uint64_t cluster_offset; uint8_t *cluster_data; struct iovec hd_iov; + bool is_write; + QEMUBH *bh; QEMUIOVector hd_qiov; BlockDriverAIOCB *hd_aiocb; } QCowAIOCB; @@ -525,6 +527,8 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs, acb->hd_aiocb = NULL; acb->sector_num = sector_num; acb->qiov = qiov; + acb->is_write = is_write; + if (qiov->niov > 1) { acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size); if (is_write) @@ -538,6 +542,38 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs, return acb; } +static void qcow_aio_read_cb(void *opaque, int ret); +static void qcow_aio_write_cb(void *opaque, int ret); + +static void qcow_aio_rw_bh(void *opaque) +{ + QCowAIOCB *acb = opaque; + qemu_bh_delete(acb->bh); + acb->bh = NULL; + + if (acb->is_write) { + qcow_aio_write_cb(opaque, 0); + } else { + qcow_aio_read_cb(opaque, 0); + } +} + +static int qcow_schedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb) +{ + if (acb->bh) { + return -EIO; + } + + acb->bh = qemu_bh_new(cb, acb); + if (!acb->bh) { + return -EIO; + } + + qemu_bh_schedule(acb->bh); + + return 0; +} + static void qcow_aio_read_cb(void *opaque, int ret) { QCowAIOCB *acb = opaque; @@ -640,12 +676,21 @@ static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { QCowAIOCB *acb; + int ret; acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); if (!acb) return NULL; - qcow_aio_read_cb(acb, 0); + ret = qcow_schedule_bh(qcow_aio_rw_bh, acb); + if (ret < 0) { + if (acb->qiov->niov > 1) { + qemu_vfree(acb->orig_buf); + } + qemu_aio_release(acb); + return NULL; + } + return &acb->common; } @@ -725,6 +770,7 @@ static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs, { BDRVQcowState *s = bs->opaque; QCowAIOCB *acb; + int ret; s->cluster_cache_offset = -1; /* disable compressed cache */ @@ -733,7 +779,15 @@ static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs, return NULL; - qcow_aio_write_cb(acb, 0); + ret = qcow_schedule_bh(qcow_aio_rw_bh, acb); + if (ret < 0) { + if (acb->qiov->niov > 1) { + qemu_vfree(acb->orig_buf); + } + qemu_aio_release(acb); + return NULL; + } + return &acb->common; } From e67a64a869312eccc1487409aaa03177da4d2f26 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 7 Jun 2011 16:12:58 +0200 Subject: [PATCH 04/14] vdi: Avoid direct AIO callback bdrv_aio_* must not call the callback before returning to its caller. In vdi, this could happen in some error cases. This starts the real requests processing in a BH to avoid this situation. Signed-off-by: Kevin Wolf --- block/vdi.c | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/block/vdi.c b/block/vdi.c index 4c9e201c33..261cf9b98d 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -152,6 +152,7 @@ typedef struct { /* Buffer for new allocated block. */ void *block_buffer; void *orig_buf; + bool is_write; int header_modified; BlockDriverAIOCB *hd_aiocb; struct iovec hd_iov; @@ -504,6 +505,8 @@ static VdiAIOCB *vdi_aio_setup(BlockDriverState *bs, int64_t sector_num, acb->hd_aiocb = NULL; acb->sector_num = sector_num; acb->qiov = qiov; + acb->is_write = is_write; + if (qiov->niov > 1) { acb->buf = qemu_blockalign(bs, qiov->size); acb->orig_buf = acb->buf; @@ -542,14 +545,20 @@ static int vdi_schedule_bh(QEMUBHFunc *cb, VdiAIOCB *acb) } static void vdi_aio_read_cb(void *opaque, int ret); +static void vdi_aio_write_cb(void *opaque, int ret); -static void vdi_aio_read_bh(void *opaque) +static void vdi_aio_rw_bh(void *opaque) { VdiAIOCB *acb = opaque; logout("\n"); qemu_bh_delete(acb->bh); acb->bh = NULL; - vdi_aio_read_cb(opaque, 0); + + if (acb->is_write) { + vdi_aio_write_cb(opaque, 0); + } else { + vdi_aio_read_cb(opaque, 0); + } } static void vdi_aio_read_cb(void *opaque, int ret) @@ -597,7 +606,7 @@ static void vdi_aio_read_cb(void *opaque, int ret) if (bmap_entry == VDI_UNALLOCATED) { /* Block not allocated, return zeros, no need to wait. */ memset(acb->buf, 0, n_sectors * SECTOR_SIZE); - ret = vdi_schedule_bh(vdi_aio_read_bh, acb); + ret = vdi_schedule_bh(vdi_aio_rw_bh, acb); if (ret < 0) { goto done; } @@ -630,12 +639,23 @@ static BlockDriverAIOCB *vdi_aio_readv(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { VdiAIOCB *acb; + int ret; + logout("\n"); acb = vdi_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); if (!acb) { return NULL; } - vdi_aio_read_cb(acb, 0); + + ret = vdi_schedule_bh(vdi_aio_rw_bh, acb); + if (ret < 0) { + if (acb->qiov->niov > 1) { + qemu_vfree(acb->orig_buf); + } + qemu_aio_release(acb); + return NULL; + } + return &acb->common; } @@ -789,12 +809,23 @@ static BlockDriverAIOCB *vdi_aio_writev(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { VdiAIOCB *acb; + int ret; + logout("\n"); acb = vdi_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); if (!acb) { return NULL; } - vdi_aio_write_cb(acb, 0); + + ret = vdi_schedule_bh(vdi_aio_rw_bh, acb); + if (ret < 0) { + if (acb->qiov->niov > 1) { + qemu_vfree(acb->orig_buf); + } + qemu_aio_release(acb); + return NULL; + } + return &acb->common; } From 39aa9a12ccbfd1ca2253fd1dc584bfb13670be47 Mon Sep 17 00:00:00 2001 From: Devin Nakamura Date: Thu, 9 Jun 2011 01:06:43 -0400 Subject: [PATCH 05/14] Replaced tabs with spaces in block.h and block_int.h Signed-off-by: Devin Nakamura Signed-off-by: Kevin Wolf --- block.h | 6 +++--- block_int.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/block.h b/block.h index da7d39cd1e..859d1d9835 100644 --- a/block.h +++ b/block.h @@ -110,7 +110,7 @@ int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res); typedef struct BlockDriverAIOCB BlockDriverAIOCB; typedef void BlockDriverCompletionFunc(void *opaque, int ret); typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector, - int sector_num); + int sector_num); BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *iov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque); @@ -118,7 +118,7 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *iov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque); BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, - BlockDriverCompletionFunc *cb, void *opaque); + BlockDriverCompletionFunc *cb, void *opaque); void bdrv_aio_cancel(BlockDriverAIOCB *acb); typedef struct BlockRequest { @@ -150,7 +150,7 @@ void bdrv_close_all(void); int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); int bdrv_has_zero_init(BlockDriverState *bs); int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, - int *pnum); + int *pnum); #define BIOS_ATA_TRANSLATION_AUTO 0 #define BIOS_ATA_TRANSLATION_NONE 1 diff --git a/block_int.h b/block_int.h index fa913371e1..1e265d274d 100644 --- a/block_int.h +++ b/block_int.h @@ -203,8 +203,8 @@ struct BlockDriverState { void *private; }; -#define CHANGE_MEDIA 0x01 -#define CHANGE_SIZE 0x02 +#define CHANGE_MEDIA 0x01 +#define CHANGE_SIZE 0x02 struct BlockDriverAIOCB { AIOPool *pool; From 9e2a3701a1fcfec0316b9dc1a6cd62869de5542c Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 24 May 2011 16:40:02 +0200 Subject: [PATCH 06/14] qcow2: Fix in-flight list after qcow2_cache_put failure If qcow2_cache_put returns an error during cluster allocation and the allocation fails, it must be removed from the list of in-flight allocations. Otherwise we'd get a loop in the list when the ACB is used for the next allocation. Luckily, this qcow2_cache_put shouldn't fail anyway because the L2 table is only read, so that qcow2_cache_put doesn't even involve I/O. Signed-off-by: Kevin Wolf Reviewed-by: Christoph Hellwig --- block/qcow2-cluster.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index c9e7bbd9d6..882f50a80b 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -796,8 +796,8 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, m->depends_on = old_alloc; m->nb_clusters = 0; *num = 0; - ret = 0; - goto fail; + + goto out_wait_dependency; } } } @@ -812,7 +812,6 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size); if (cluster_offset < 0) { - QLIST_REMOVE(m, next_in_flight); ret = cluster_offset; goto fail; } @@ -825,7 +824,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, out: ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); if (ret < 0) { - return ret; + goto fail_put; } m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end); @@ -835,8 +834,13 @@ out: return 0; +out_wait_dependency: + return qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); + fail: qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); +fail_put: + QLIST_REMOVE(m, next_in_flight); return ret; } From def93791f22a3536c3508244ec7d270098484c7d Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 26 May 2011 15:00:34 +0200 Subject: [PATCH 07/14] ide: Split error status from status register When adding the werror=stop mode, some flags were added to s->status which are used to determine what kind of operation should be restarted when the VM is continued. Unfortunately, it turns out that s->status is in fact a device register and as such is visible to the guest (some of the abused bits are even writable for the guest). For migration we keep on using the old VMState field (renamed to migration_compat_status) if the status register doesn't use any of the previously abused bits. If it does, we use a subsection with a clean copy of the status register. The error status is always sent in a subsection if there is any error. It can't use the old field because errors happen even without PCI. Signed-off-by: Kevin Wolf --- hw/ide/core.c | 28 +++++++++++++++++- hw/ide/internal.h | 8 ++++++ hw/ide/pci.c | 73 ++++++++++++++++++++++++++++++++++++++++++----- hw/ide/pci.h | 4 +++ 4 files changed, 105 insertions(+), 8 deletions(-) diff --git a/hw/ide/core.c b/hw/ide/core.c index 95beb175b3..da250ac39b 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -446,7 +446,7 @@ static int ide_handle_rw_error(IDEState *s, int error, int op) if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC) || action == BLOCK_ERR_STOP_ANY) { s->bus->dma->ops->set_unit(s->bus->dma, s->unit); - s->bus->dma->ops->add_status(s->bus->dma, op); + s->bus->error_status = op; bdrv_mon_event(s->bs, BDRV_ACTION_STOP, is_read); vm_stop(VMSTOP_DISKFULL); } else { @@ -1847,6 +1847,13 @@ static bool ide_atapi_gesn_needed(void *opaque) return s->events.new_media || s->events.eject_request; } +static bool ide_error_needed(void *opaque) +{ + IDEBus *bus = opaque; + + return (bus->error_status != 0); +} + /* Fields for GET_EVENT_STATUS_NOTIFICATION ATAPI command */ const VMStateDescription vmstate_ide_atapi_gesn_state = { .name ="ide_drive/atapi/gesn_state", @@ -1921,6 +1928,17 @@ const VMStateDescription vmstate_ide_drive = { } }; +const VMStateDescription vmstate_ide_error_status = { + .name ="ide_bus/error", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField []) { + VMSTATE_INT32(error_status, IDEBus), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_ide_bus = { .name = "ide_bus", .version_id = 1, @@ -1930,6 +1948,14 @@ const VMStateDescription vmstate_ide_bus = { VMSTATE_UINT8(cmd, IDEBus), VMSTATE_UINT8(unit, IDEBus), VMSTATE_END_OF_LIST() + }, + .subsections = (VMStateSubsection []) { + { + .vmsd = &vmstate_ide_error_status, + .needed = ide_error_needed, + }, { + /* empty */ + } } }; diff --git a/hw/ide/internal.h b/hw/ide/internal.h index c2b35ec5e6..8d18cc3733 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -486,6 +486,8 @@ struct IDEBus { uint8_t unit; uint8_t cmd; qemu_irq irq; + + int error_status; }; struct IDEDevice { @@ -505,11 +507,17 @@ struct IDEDeviceInfo { #define BM_STATUS_DMAING 0x01 #define BM_STATUS_ERROR 0x02 #define BM_STATUS_INT 0x04 + +/* FIXME These are not status register bits */ #define BM_STATUS_DMA_RETRY 0x08 #define BM_STATUS_PIO_RETRY 0x10 #define BM_STATUS_RETRY_READ 0x20 #define BM_STATUS_RETRY_FLUSH 0x40 +#define BM_MIGRATION_COMPAT_STATUS_BITS \ + (BM_STATUS_DMA_RETRY | BM_STATUS_PIO_RETRY | \ + BM_STATUS_RETRY_READ | BM_STATUS_RETRY_FLUSH) + #define BM_CMD_START 0x01 #define BM_CMD_READ 0x08 diff --git a/hw/ide/pci.c b/hw/ide/pci.c index a4726adbea..7fa32bdff1 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -183,27 +183,33 @@ static void bmdma_restart_dma(BMDMAState *bm, int is_read) bmdma_start_dma(&bm->dma, s, bm->dma_cb); } +/* TODO This should be common IDE code */ static void bmdma_restart_bh(void *opaque) { BMDMAState *bm = opaque; + IDEBus *bus = bm->bus; int is_read; qemu_bh_delete(bm->bh); bm->bh = NULL; - is_read = !!(bm->status & BM_STATUS_RETRY_READ); + if (bm->unit == (uint8_t) -1) { + return; + } - if (bm->status & BM_STATUS_DMA_RETRY) { - bm->status &= ~(BM_STATUS_DMA_RETRY | BM_STATUS_RETRY_READ); + is_read = !!(bus->error_status & BM_STATUS_RETRY_READ); + + if (bus->error_status & BM_STATUS_DMA_RETRY) { + bus->error_status &= ~(BM_STATUS_DMA_RETRY | BM_STATUS_RETRY_READ); bmdma_restart_dma(bm, is_read); - } else if (bm->status & BM_STATUS_PIO_RETRY) { - bm->status &= ~(BM_STATUS_PIO_RETRY | BM_STATUS_RETRY_READ); + } else if (bus->error_status & BM_STATUS_PIO_RETRY) { + bus->error_status &= ~(BM_STATUS_PIO_RETRY | BM_STATUS_RETRY_READ); if (is_read) { ide_sector_read(bmdma_active_if(bm)); } else { ide_sector_write(bmdma_active_if(bm)); } - } else if (bm->status & BM_STATUS_RETRY_FLUSH) { + } else if (bus->error_status & BM_STATUS_RETRY_FLUSH) { ide_flush_cache(bmdma_active_if(bm)); } } @@ -351,6 +357,43 @@ static bool ide_bmdma_current_needed(void *opaque) return (bm->cur_prd_len != 0); } +static bool ide_bmdma_status_needed(void *opaque) +{ + BMDMAState *bm = opaque; + + /* Older versions abused some bits in the status register for internal + * error state. If any of these bits are set, we must add a subsection to + * transfer the real status register */ + uint8_t abused_bits = BM_MIGRATION_COMPAT_STATUS_BITS; + + return ((bm->status & abused_bits) != 0); +} + +static void ide_bmdma_pre_save(void *opaque) +{ + BMDMAState *bm = opaque; + uint8_t abused_bits = BM_MIGRATION_COMPAT_STATUS_BITS; + + bm->migration_compat_status = + (bm->status & ~abused_bits) | (bm->bus->error_status & abused_bits); +} + +/* This function accesses bm->bus->error_status which is loaded only after + * BMDMA itself. This is why the function is called from ide_pci_post_load + * instead of being registered with VMState where it would run too early. */ +static int ide_bmdma_post_load(void *opaque, int version_id) +{ + BMDMAState *bm = opaque; + uint8_t abused_bits = BM_MIGRATION_COMPAT_STATUS_BITS; + + if (bm->status == 0) { + bm->status = bm->migration_compat_status & ~abused_bits; + bm->bus->error_status |= bm->migration_compat_status & abused_bits; + } + + return 0; +} + static const VMStateDescription vmstate_bmdma_current = { .name = "ide bmdma_current", .version_id = 1, @@ -365,15 +408,26 @@ static const VMStateDescription vmstate_bmdma_current = { } }; +const VMStateDescription vmstate_bmdma_status = { + .name ="ide bmdma/status", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField []) { + VMSTATE_UINT8(status, BMDMAState), + VMSTATE_END_OF_LIST() + } +}; static const VMStateDescription vmstate_bmdma = { .name = "ide bmdma", .version_id = 3, .minimum_version_id = 0, .minimum_version_id_old = 0, + .pre_save = ide_bmdma_pre_save, .fields = (VMStateField []) { VMSTATE_UINT8(cmd, BMDMAState), - VMSTATE_UINT8(status, BMDMAState), + VMSTATE_UINT8(migration_compat_status, BMDMAState), VMSTATE_UINT32(addr, BMDMAState), VMSTATE_INT64(sector_num, BMDMAState), VMSTATE_UINT32(nsector, BMDMAState), @@ -384,6 +438,9 @@ static const VMStateDescription vmstate_bmdma = { { .vmsd = &vmstate_bmdma_current, .needed = ide_bmdma_current_needed, + }, { + .vmsd = &vmstate_bmdma_status, + .needed = ide_bmdma_status_needed, }, { /* empty */ } @@ -399,7 +456,9 @@ static int ide_pci_post_load(void *opaque, int version_id) /* current versions always store 0/1, but older version stored bigger values. We only need last bit */ d->bmdma[i].unit &= 1; + ide_bmdma_post_load(&d->bmdma[i], -1); } + return 0; } diff --git a/hw/ide/pci.h b/hw/ide/pci.h index cd72cbaeb9..b4f3691a5c 100644 --- a/hw/ide/pci.h +++ b/hw/ide/pci.h @@ -22,6 +22,10 @@ typedef struct BMDMAState { IORange addr_ioport; QEMUBH *bh; qemu_irq irq; + + /* Bit 0-2 and 7: BM status register + * Bit 3-6: bus->error_status */ + uint8_t migration_compat_status; } BMDMAState; typedef struct PCIIDEState { From fdc650d76d785ba56b1e87c260ce96d8e22c7cc8 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 9 Jun 2011 14:12:57 +0200 Subject: [PATCH 08/14] ide: Fix ide_drive_pio_state_needed() When a failed PIO request caused the VM to stop, we still need to transfer the PIO state even though DRQ=0 at this point. Signed-off-by: Kevin Wolf --- hw/ide/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/ide/core.c b/hw/ide/core.c index da250ac39b..e5def8b3b1 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -1837,7 +1837,8 @@ static bool ide_drive_pio_state_needed(void *opaque) { IDEState *s = opaque; - return (s->status & DRQ_STAT) != 0; + return ((s->status & DRQ_STAT) != 0) + || (s->bus->error_status & BM_STATUS_PIO_RETRY); } static bool ide_atapi_gesn_needed(void *opaque) From 0754f9ecef98ad95442f16657df9721dd5682c3a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 9 Jun 2011 14:15:18 +0200 Subject: [PATCH 09/14] ide: Add forgotten VMSTATE_END_OF_LIST in subsection Signed-off-by: Kevin Wolf --- hw/ide/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/ide/core.c b/hw/ide/core.c index e5def8b3b1..399b74c9d6 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -1864,6 +1864,7 @@ const VMStateDescription vmstate_ide_atapi_gesn_state = { .fields = (VMStateField []) { VMSTATE_BOOL(events.new_media, IDEState), VMSTATE_BOOL(events.eject_request, IDEState), + VMSTATE_END_OF_LIST() } }; From cb144ccb2ffdb3bca4026455074e434d8db34d73 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 19 May 2011 10:57:59 +0200 Subject: [PATCH 10/14] make dma_bdrv_io available to drivers Make dma_bdrv_io available for drivers, and pass an explicit I/O function instead of hardcoding bdrv_aio_readv/bdrv_aio_writev. This is required to implement non-READ/WRITE dma commands in the ide driver, e.g. the upcoming TRIM support. Signed-off-by: Christoph Hellwig Signed-off-by: Kevin Wolf --- dma-helpers.c | 23 ++++++++++------------- dma.h | 8 ++++++++ 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/dma-helpers.c b/dma-helpers.c index 712ed897f3..ba7f897d42 100644 --- a/dma-helpers.c +++ b/dma-helpers.c @@ -47,6 +47,7 @@ typedef struct { target_phys_addr_t sg_cur_byte; QEMUIOVector iov; QEMUBH *bh; + DMAIOFunc *io_func; } DMAAIOCB; static void dma_bdrv_cb(void *opaque, int ret); @@ -116,13 +117,8 @@ static void dma_bdrv_cb(void *opaque, int ret) return; } - if (dbs->is_write) { - dbs->acb = bdrv_aio_writev(dbs->bs, dbs->sector_num, &dbs->iov, - dbs->iov.size / 512, dma_bdrv_cb, dbs); - } else { - dbs->acb = bdrv_aio_readv(dbs->bs, dbs->sector_num, &dbs->iov, - dbs->iov.size / 512, dma_bdrv_cb, dbs); - } + dbs->acb = dbs->io_func(dbs->bs, dbs->sector_num, &dbs->iov, + dbs->iov.size / 512, dma_bdrv_cb, dbs); if (!dbs->acb) { dma_bdrv_unmap(dbs); qemu_iovec_destroy(&dbs->iov); @@ -144,12 +140,12 @@ static AIOPool dma_aio_pool = { .cancel = dma_aio_cancel, }; -static BlockDriverAIOCB *dma_bdrv_io( +BlockDriverAIOCB *dma_bdrv_io( BlockDriverState *bs, QEMUSGList *sg, uint64_t sector_num, - BlockDriverCompletionFunc *cb, void *opaque, - int is_write) + DMAIOFunc *io_func, BlockDriverCompletionFunc *cb, + void *opaque, int is_write) { - DMAAIOCB *dbs = qemu_aio_get(&dma_aio_pool, bs, cb, opaque); + DMAAIOCB *dbs = qemu_aio_get(&dma_aio_pool, bs, cb, opaque); dbs->acb = NULL; dbs->bs = bs; @@ -158,6 +154,7 @@ static BlockDriverAIOCB *dma_bdrv_io( dbs->sg_cur_index = 0; dbs->sg_cur_byte = 0; dbs->is_write = is_write; + dbs->io_func = io_func; dbs->bh = NULL; qemu_iovec_init(&dbs->iov, sg->nsg); dma_bdrv_cb(dbs, 0); @@ -173,12 +170,12 @@ BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs, QEMUSGList *sg, uint64_t sector, void (*cb)(void *opaque, int ret), void *opaque) { - return dma_bdrv_io(bs, sg, sector, cb, opaque, 0); + return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque, 0); } BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs, QEMUSGList *sg, uint64_t sector, void (*cb)(void *opaque, int ret), void *opaque) { - return dma_bdrv_io(bs, sg, sector, cb, opaque, 1); + return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque, 1); } diff --git a/dma.h b/dma.h index f3bb275159..3d8324bb54 100644 --- a/dma.h +++ b/dma.h @@ -32,6 +32,14 @@ void qemu_sglist_add(QEMUSGList *qsg, target_phys_addr_t base, target_phys_addr_t len); void qemu_sglist_destroy(QEMUSGList *qsg); +typedef BlockDriverAIOCB *DMAIOFunc(BlockDriverState *bs, int64_t sector_num, + QEMUIOVector *iov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque); + +BlockDriverAIOCB *dma_bdrv_io(BlockDriverState *bs, + QEMUSGList *sg, uint64_t sector_num, + DMAIOFunc *io_func, BlockDriverCompletionFunc *cb, + void *opaque, int is_write); BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs, QEMUSGList *sg, uint64_t sector, BlockDriverCompletionFunc *cb, void *opaque); From 4e1e00515e2522bbae98a0653ea2692ec20851ac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 19 May 2011 10:58:09 +0200 Subject: [PATCH 11/14] ide: allow other dma comands than read and write Replace the is_read flag with a dma_cmd flag to allow the dma and restart logic to handler other commands like TRIM. Signed-off-by: Christoph Hellwig Signed-off-by: Kevin Wolf --- hw/ide/core.c | 25 ++++++++++++++----------- hw/ide/internal.h | 10 +++++++++- hw/ide/macio.c | 9 +++++++-- hw/ide/pci.c | 6 +++--- 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/hw/ide/core.c b/hw/ide/core.c index 399b74c9d6..14bda82fd5 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -472,7 +472,7 @@ handle_rw_error: if (ret < 0) { int op = BM_STATUS_DMA_RETRY; - if (s->is_read) + if (s->dma_cmd == IDE_DMA_READ) op |= BM_STATUS_RETRY_READ; if (ide_handle_rw_error(s, -ret, op)) { return; @@ -482,7 +482,7 @@ handle_rw_error: n = s->io_buffer_size >> 9; sector_num = ide_get_sector(s); if (n > 0) { - dma_buf_commit(s, s->is_read); + dma_buf_commit(s, ide_cmd_is_read(s)); sector_num += n; ide_set_sector(s, sector_num); s->nsector -= n; @@ -499,23 +499,26 @@ handle_rw_error: n = s->nsector; s->io_buffer_index = 0; s->io_buffer_size = n * 512; - if (s->bus->dma->ops->prepare_buf(s->bus->dma, s->is_read) == 0) { + if (s->bus->dma->ops->prepare_buf(s->bus->dma, ide_cmd_is_read(s)) == 0) { /* The PRDs were too short. Reset the Active bit, but don't raise an * interrupt. */ goto eot; } #ifdef DEBUG_AIO - printf("ide_dma_cb: sector_num=%" PRId64 " n=%d, is_read=%d\n", - sector_num, n, s->is_read); + printf("ide_dma_cb: sector_num=%" PRId64 " n=%d, cmd_cmd=%d\n", + sector_num, n, s->dma_cmd); #endif - if (s->is_read) { + switch (s->dma_cmd) { + case IDE_DMA_READ: s->bus->dma->aiocb = dma_bdrv_read(s->bs, &s->sg, sector_num, ide_dma_cb, s); - } else { + break; + case IDE_DMA_WRITE: s->bus->dma->aiocb = dma_bdrv_write(s->bs, &s->sg, sector_num, ide_dma_cb, s); + break; } if (!s->bus->dma->aiocb) { @@ -528,12 +531,12 @@ eot: ide_set_inactive(s); } -static void ide_sector_start_dma(IDEState *s, int is_read) +static void ide_sector_start_dma(IDEState *s, enum ide_dma_cmd dma_cmd) { s->status = READY_STAT | SEEK_STAT | DRQ_STAT | BUSY_STAT; s->io_buffer_index = 0; s->io_buffer_size = 0; - s->is_read = is_read; + s->dma_cmd = dma_cmd; s->bus->dma->ops->start_dma(s->bus->dma, s, ide_dma_cb); } @@ -916,7 +919,7 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val) if (!s->bs) goto abort_cmd; ide_cmd_lba48_transform(s, lba48); - ide_sector_start_dma(s, 1); + ide_sector_start_dma(s, IDE_DMA_READ); break; case WIN_WRITEDMA_EXT: lba48 = 1; @@ -925,7 +928,7 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val) if (!s->bs) goto abort_cmd; ide_cmd_lba48_transform(s, lba48); - ide_sector_start_dma(s, 0); + ide_sector_start_dma(s, IDE_DMA_WRITE); s->media_changed = 1; break; case WIN_READ_NATIVE_MAX_EXT: diff --git a/hw/ide/internal.h b/hw/ide/internal.h index 8d18cc3733..ea3edf5c33 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -379,6 +379,14 @@ struct unreported_events { bool new_media; }; +enum ide_dma_cmd { + IDE_DMA_READ, + IDE_DMA_WRITE, +}; + +#define ide_cmd_is_read(s) \ + ((s)->dma_cmd == IDE_DMA_READ) + /* NOTE: IDEState represents in fact one drive */ struct IDEState { IDEBus *bus; @@ -446,7 +454,7 @@ struct IDEState { uint32_t mdata_size; uint8_t *mdata_storage; int media_changed; - int is_read; + enum ide_dma_cmd dma_cmd; /* SMART */ uint8_t smart_enabled; uint8_t smart_autosave; diff --git a/hw/ide/macio.c b/hw/ide/macio.c index 7107f6b3c2..099b8cbfe9 100644 --- a/hw/ide/macio.c +++ b/hw/ide/macio.c @@ -145,12 +145,17 @@ static void pmac_ide_transfer_cb(void *opaque, int ret) io->addr += io->len; io->len = 0; - if (s->is_read) + switch (s->dma_cmd) { + case IDE_DMA_READ: m->aiocb = dma_bdrv_read(s->bs, &s->sg, sector_num, pmac_ide_transfer_cb, io); - else + break; + case IDE_DMA_WRITE: m->aiocb = dma_bdrv_write(s->bs, &s->sg, sector_num, pmac_ide_transfer_cb, io); + break; + } + if (!m->aiocb) pmac_ide_transfer_cb(io, -1); } diff --git a/hw/ide/pci.c b/hw/ide/pci.c index 7fa32bdff1..80c579457a 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -169,7 +169,7 @@ static int bmdma_set_inactive(IDEDMA *dma) return 0; } -static void bmdma_restart_dma(BMDMAState *bm, int is_read) +static void bmdma_restart_dma(BMDMAState *bm, enum ide_dma_cmd dma_cmd) { IDEState *s = bmdma_active_if(bm); @@ -177,7 +177,7 @@ static void bmdma_restart_dma(BMDMAState *bm, int is_read) s->io_buffer_index = 0; s->io_buffer_size = 0; s->nsector = bm->nsector; - s->is_read = is_read; + s->dma_cmd = dma_cmd; bm->cur_addr = bm->addr; bm->dma_cb = ide_dma_cb; bmdma_start_dma(&bm->dma, s, bm->dma_cb); @@ -201,7 +201,7 @@ static void bmdma_restart_bh(void *opaque) if (bus->error_status & BM_STATUS_DMA_RETRY) { bus->error_status &= ~(BM_STATUS_DMA_RETRY | BM_STATUS_RETRY_READ); - bmdma_restart_dma(bm, is_read); + bmdma_restart_dma(bm, is_read ? IDE_DMA_READ : IDE_DMA_WRITE); } else if (bus->error_status & BM_STATUS_PIO_RETRY) { bus->error_status &= ~(BM_STATUS_PIO_RETRY | BM_STATUS_RETRY_READ); if (is_read) { From d353fb72f59cd0e1f67baf773e74719cda761a89 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 19 May 2011 10:58:19 +0200 Subject: [PATCH 12/14] ide: add TRIM support Add support for TRIM sub function of the data set management command, and wire it up to the qemu discard infrastructure. Signed-off-by: Christoph Hellwig Signed-off-by: Kevin Wolf --- hw/ide/core.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++- hw/ide/internal.h | 14 ++++++- hw/ide/macio.c | 4 ++ hw/ide/pci.c | 9 ++++- hw/ide/qdev.c | 5 +++ 5 files changed, 124 insertions(+), 5 deletions(-) diff --git a/hw/ide/core.c b/hw/ide/core.c index 14bda82fd5..ca17a436c0 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -78,7 +78,7 @@ static void ide_identify(IDEState *s) { uint16_t *p; unsigned int oldsize; - IDEDevice *dev; + IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master; if (s->identify_set) { memcpy(s->io_buffer, s->identify_data, sizeof(s->identify_data)); @@ -124,6 +124,9 @@ static void ide_identify(IDEState *s) put_le16(p + 66, 120); put_le16(p + 67, 120); put_le16(p + 68, 120); + if (dev && dev->conf.discard_granularity) { + put_le16(p + 69, (1 << 14)); /* determinate TRIM behavior */ + } if (s->ncq_queues) { put_le16(p + 75, s->ncq_queues - 1); @@ -154,9 +157,12 @@ static void ide_identify(IDEState *s) put_le16(p + 101, s->nb_sectors >> 16); put_le16(p + 102, s->nb_sectors >> 32); put_le16(p + 103, s->nb_sectors >> 48); - dev = s->unit ? s->bus->slave : s->bus->master; + if (dev && dev->conf.physical_block_size) put_le16(p + 106, 0x6000 | get_physical_block_exp(&dev->conf)); + if (dev && dev->conf.discard_granularity) { + put_le16(p + 169, 1); /* TRIM support */ + } memcpy(s->identify_data, p, sizeof(s->identify_data)); s->identify_set = 1; @@ -299,6 +305,74 @@ static void ide_set_signature(IDEState *s) } } +typedef struct TrimAIOCB { + BlockDriverAIOCB common; + QEMUBH *bh; + int ret; +} TrimAIOCB; + +static void trim_aio_cancel(BlockDriverAIOCB *acb) +{ + TrimAIOCB *iocb = container_of(acb, TrimAIOCB, common); + + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + qemu_aio_release(iocb); +} + +static AIOPool trim_aio_pool = { + .aiocb_size = sizeof(TrimAIOCB), + .cancel = trim_aio_cancel, +}; + +static void ide_trim_bh_cb(void *opaque) +{ + TrimAIOCB *iocb = opaque; + + iocb->common.cb(iocb->common.opaque, iocb->ret); + + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + + qemu_aio_release(iocb); +} + +BlockDriverAIOCB *ide_issue_trim(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + TrimAIOCB *iocb; + int i, j, ret; + + iocb = qemu_aio_get(&trim_aio_pool, bs, cb, opaque); + iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb); + iocb->ret = 0; + + for (j = 0; j < qiov->niov; j++) { + uint64_t *buffer = qiov->iov[j].iov_base; + + for (i = 0; i < qiov->iov[j].iov_len / 8; i++) { + /* 6-byte LBA + 2-byte range per entry */ + uint64_t entry = le64_to_cpu(buffer[i]); + uint64_t sector = entry & 0x0000ffffffffffffULL; + uint16_t count = entry >> 48; + + if (count == 0) { + break; + } + + ret = bdrv_discard(bs, sector, count); + if (!iocb->ret) { + iocb->ret = ret; + } + } + } + + qemu_bh_schedule(iocb->bh); + + return &iocb->common; +} + static inline void ide_abort_command(IDEState *s) { s->status = READY_STAT | ERR_STAT; @@ -474,6 +548,9 @@ handle_rw_error: if (s->dma_cmd == IDE_DMA_READ) op |= BM_STATUS_RETRY_READ; + else if (s->dma_cmd == IDE_DMA_TRIM) + op |= BM_STATUS_RETRY_TRIM; + if (ide_handle_rw_error(s, -ret, op)) { return; } @@ -519,6 +596,10 @@ handle_rw_error: s->bus->dma->aiocb = dma_bdrv_write(s->bs, &s->sg, sector_num, ide_dma_cb, s); break; + case IDE_DMA_TRIM: + s->bus->dma->aiocb = dma_bdrv_io(s->bs, &s->sg, sector_num, + ide_issue_trim, ide_dma_cb, s, 1); + break; } if (!s->bus->dma->aiocb) { @@ -818,6 +899,18 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val) return; switch(val) { + case WIN_DSM: + switch (s->feature) { + case DSM_TRIM: + if (!s->bs) { + goto abort_cmd; + } + ide_sector_start_dma(s, IDE_DMA_TRIM); + break; + default: + goto abort_cmd; + } + break; case WIN_IDENTIFY: if (s->bs && s->drive_kind != IDE_CD) { if (s->drive_kind != IDE_CFATA) diff --git a/hw/ide/internal.h b/hw/ide/internal.h index ea3edf5c33..02e805f070 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -62,7 +62,11 @@ typedef struct IDEDMAOps IDEDMAOps; */ #define CFA_REQ_EXT_ERROR_CODE 0x03 /* CFA Request Extended Error Code */ /* - * 0x04->0x07 Reserved + * 0x04->0x05 Reserved + */ +#define WIN_DSM 0x06 +/* + * 0x07 Reserved */ #define WIN_SRST 0x08 /* ATAPI soft reset command */ #define WIN_DEVICE_RESET 0x08 @@ -190,6 +194,9 @@ typedef struct IDEDMAOps IDEDMAOps; #define IDE_DMA_BUF_SECTORS 256 +/* feature values for Data Set Management */ +#define DSM_TRIM 0x01 + #if (IDE_DMA_BUF_SECTORS < MAX_MULT_SECTORS) #error "IDE_DMA_BUF_SECTORS must be bigger or equal to MAX_MULT_SECTORS" #endif @@ -382,6 +389,7 @@ struct unreported_events { enum ide_dma_cmd { IDE_DMA_READ, IDE_DMA_WRITE, + IDE_DMA_TRIM, }; #define ide_cmd_is_read(s) \ @@ -521,6 +529,7 @@ struct IDEDeviceInfo { #define BM_STATUS_PIO_RETRY 0x10 #define BM_STATUS_RETRY_READ 0x20 #define BM_STATUS_RETRY_FLUSH 0x40 +#define BM_STATUS_RETRY_TRIM 0x80 #define BM_MIGRATION_COMPAT_STATUS_BITS \ (BM_STATUS_DMA_RETRY | BM_STATUS_PIO_RETRY | \ @@ -591,6 +600,9 @@ void ide_transfer_start(IDEState *s, uint8_t *buf, int size, EndTransferFunc *end_transfer_func); void ide_transfer_stop(IDEState *s); void ide_set_inactive(IDEState *s); +BlockDriverAIOCB *ide_issue_trim(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque); /* hw/ide/atapi.c */ void ide_atapi_cmd(IDEState *s); diff --git a/hw/ide/macio.c b/hw/ide/macio.c index 099b8cbfe9..7daeb31ec3 100644 --- a/hw/ide/macio.c +++ b/hw/ide/macio.c @@ -154,6 +154,10 @@ static void pmac_ide_transfer_cb(void *opaque, int ret) m->aiocb = dma_bdrv_write(s->bs, &s->sg, sector_num, pmac_ide_transfer_cb, io); break; + case IDE_DMA_TRIM: + m->aiocb = dma_bdrv_io(s->bs, &s->sg, sector_num, + ide_issue_trim, pmac_ide_transfer_cb, s, 1); + break; } if (!m->aiocb) diff --git a/hw/ide/pci.c b/hw/ide/pci.c index 80c579457a..c940375064 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -200,8 +200,13 @@ static void bmdma_restart_bh(void *opaque) is_read = !!(bus->error_status & BM_STATUS_RETRY_READ); if (bus->error_status & BM_STATUS_DMA_RETRY) { - bus->error_status &= ~(BM_STATUS_DMA_RETRY | BM_STATUS_RETRY_READ); - bmdma_restart_dma(bm, is_read ? IDE_DMA_READ : IDE_DMA_WRITE); + if (bus->error_status & BM_STATUS_RETRY_TRIM) { + bus->error_status &= ~BM_STATUS_RETRY_TRIM; + bmdma_restart_dma(bm, IDE_DMA_TRIM); + } else { + bus->error_status &= ~(BM_STATUS_DMA_RETRY | BM_STATUS_RETRY_READ); + bmdma_restart_dma(bm, is_read ? IDE_DMA_READ : IDE_DMA_WRITE); + } } else if (bus->error_status & BM_STATUS_PIO_RETRY) { bus->error_status &= ~(BM_STATUS_PIO_RETRY | BM_STATUS_RETRY_READ); if (is_read) { diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c index 3f9dc89c6d..d9b8f24bb5 100644 --- a/hw/ide/qdev.c +++ b/hw/ide/qdev.c @@ -125,6 +125,11 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind) const char *serial; DriveInfo *dinfo; + if (dev->conf.discard_granularity && dev->conf.discard_granularity != 512) { + error_report("discard_granularity must be 512 for ide"); + return -1; + } + serial = dev->serial; if (!serial) { /* try to fall back to value set with legacy -drive serial=... */ From ee752da74f5d07cf441f8d42455c4241d6051ae5 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 10 Jun 2011 16:32:13 +0200 Subject: [PATCH 13/14] ide: Clear error_status after restarting flush Clearing the error status flag was missing for restarting flushes. Now that the error status is separate from the BM status register, we can simply set it to 0 after restarting the request. This ensures that we never forget to clear a bit. Signed-off-by: Kevin Wolf --- hw/ide/pci.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/hw/ide/pci.c b/hw/ide/pci.c index c940375064..9f3050a15e 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -189,6 +189,7 @@ static void bmdma_restart_bh(void *opaque) BMDMAState *bm = opaque; IDEBus *bus = bm->bus; int is_read; + int error_status; qemu_bh_delete(bm->bh); bm->bh = NULL; @@ -199,22 +200,25 @@ static void bmdma_restart_bh(void *opaque) is_read = !!(bus->error_status & BM_STATUS_RETRY_READ); - if (bus->error_status & BM_STATUS_DMA_RETRY) { - if (bus->error_status & BM_STATUS_RETRY_TRIM) { - bus->error_status &= ~BM_STATUS_RETRY_TRIM; + /* The error status must be cleared before resubmitting the request: The + * request may fail again, and this case can only be distinguished if the + * called function can set a new error status. */ + error_status = bus->error_status; + bus->error_status = 0; + + if (error_status & BM_STATUS_DMA_RETRY) { + if (error_status & BM_STATUS_RETRY_TRIM) { bmdma_restart_dma(bm, IDE_DMA_TRIM); } else { - bus->error_status &= ~(BM_STATUS_DMA_RETRY | BM_STATUS_RETRY_READ); bmdma_restart_dma(bm, is_read ? IDE_DMA_READ : IDE_DMA_WRITE); } - } else if (bus->error_status & BM_STATUS_PIO_RETRY) { - bus->error_status &= ~(BM_STATUS_PIO_RETRY | BM_STATUS_RETRY_READ); + } else if (error_status & BM_STATUS_PIO_RETRY) { if (is_read) { ide_sector_read(bmdma_active_if(bm)); } else { ide_sector_write(bmdma_active_if(bm)); } - } else if (bus->error_status & BM_STATUS_RETRY_FLUSH) { + } else if (error_status & BM_STATUS_RETRY_FLUSH) { ide_flush_cache(bmdma_active_if(bm)); } } From 7887f6201ff3fdc9a142eba14d61c563adb57596 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 7 Jun 2011 17:51:21 +0200 Subject: [PATCH 14/14] Allow nested qemu_bh_poll() after BH deletion Without this, qemu segfaults when a BH handler first deletes its BH and then calls another function which involves a nested qemu_bh_poll() call. This can be reproduced by generating an I/O error (e.g. with blkdebug) on an IDE device and using rerror/werror=stop to stop the VM. When continuing the VM, qemu segfaults. Signed-off-by: Kevin Wolf Reviewed-by: Stefan Hajnoczi --- async.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/async.c b/async.c index 57ac3a8180..fd313dffb7 100644 --- a/async.c +++ b/async.c @@ -137,11 +137,12 @@ QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque) int qemu_bh_poll(void) { - QEMUBH *bh, **bhp; + QEMUBH *bh, **bhp, *next; int ret; ret = 0; - for (bh = async_context->first_bh; bh; bh = bh->next) { + for (bh = async_context->first_bh; bh; bh = next) { + next = bh->next; if (!bh->deleted && bh->scheduled) { bh->scheduled = 0; if (!bh->idle)