From 730a9c53b4e52681fcfe31cf38854cbf91e132c7 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 6 Aug 2012 15:49:03 +0300 Subject: [PATCH 01/11] virtio-blk: fix use-after-free while handling scsi commands The scsi passthrough handler falls through after completing a request into the failure path, resulting in a use after free. Reproducible by running a guest with aio=native on a block device. Reported-by: Stefan Priebe Signed-off-by: Avi Kivity Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/virtio-blk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index f21757ed55..552b3b6c6a 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -254,6 +254,7 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req) virtio_blk_req_complete(req, status); g_free(req); + return; #else abort(); #endif From 61f52e06f0a21bab782f98ef3ea789aa6d0aa046 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 3 Aug 2012 15:57:06 -0400 Subject: [PATCH 02/11] ahci: Fix ahci cdrom read corruptions for reads > 128k MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While testing q35, which has its cdrom attached to the ahci controller, I found that the Fedora 17 install would panic on boot. The panic occurs while squashfs is trying to read from the cdrom. The errors are: [ 8.622711] SQUASHFS error: xz_dec_run error, data probably corrupt [ 8.625180] SQUASHFS error: squashfs_read_data failed to read block 0x20be48a I was also able to produce corrupt data reads using an installed piix based qemu machine, using 'dd'. I found that the corruptions were only occuring when then read size was greater than 128k. For example, the following command results in corrupted reads: dd if=/dev/sr0 of=/tmp/blah bs=256k iflag=direct The > 128k size reads exercise a different code path than 128k and below. In ide_atapi_cmd_read_dma_cb() s->io_buffer_size is capped at 128k. Thus, ide_atapi_cmd_read_dma_cb() is called a second time when the read is > 128k. However, ahci_dma_rw_buf() restart the read from offset 0, instead of at 128k. Thus, resulting in a corrupted read. To fix this, I've introduced 'io_buffer_offset' field in IDEState to keep track of the offset. I've also modified ahci_populate_sglist() to take a new 3rd offset argument, so that the sglist is property initialized. I've tested this patch using 'dd' testing, and Fedora 17 now correctly boots and installs on q35 with the cdrom ahci controller. Signed-off-by: Jason Baron Tested-by: Andreas Färber Signed-off-by: Kevin Wolf --- hw/ide/ahci.c | 41 ++++++++++++++++++++++++++++++++++------- hw/ide/internal.h | 1 + 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index efea93f0b4..de580a6eff 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -636,7 +636,7 @@ static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis) } } -static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist) +static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, int offset) { AHCICmdHdr *cmd = ad->cur_cmd; uint32_t opts = le32_to_cpu(cmd->opts); @@ -647,6 +647,10 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist) uint8_t *prdt; int i; int r = 0; + int sum = 0; + int off_idx = -1; + int off_pos = -1; + int tbl_entry_size; if (!sglist_alloc_hint) { DPRINTF(ad->port_no, "no sg list given by guest: 0x%08x\n", opts); @@ -669,9 +673,30 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist) /* Get entries in the PRDT, init a qemu sglist accordingly */ if (sglist_alloc_hint > 0) { AHCI_SG *tbl = (AHCI_SG *)prdt; - - qemu_sglist_init(sglist, sglist_alloc_hint, ad->hba->dma); + sum = 0; for (i = 0; i < sglist_alloc_hint; i++) { + /* flags_size is zero-based */ + tbl_entry_size = (le32_to_cpu(tbl[i].flags_size) + 1); + if (offset <= (sum + tbl_entry_size)) { + off_idx = i; + off_pos = offset - sum; + break; + } + sum += tbl_entry_size; + } + if ((off_idx == -1) || (off_pos < 0) || (off_pos > tbl_entry_size)) { + DPRINTF(ad->port_no, "%s: Incorrect offset! " + "off_idx: %d, off_pos: %d\n", + __func__, off_idx, off_pos); + r = -1; + goto out; + } + + qemu_sglist_init(sglist, (sglist_alloc_hint - off_idx), ad->hba->dma); + qemu_sglist_add(sglist, le64_to_cpu(tbl[off_idx].addr + off_pos), + le32_to_cpu(tbl[off_idx].flags_size) + 1 - off_pos); + + for (i = off_idx + 1; i < sglist_alloc_hint; i++) { /* flags_size is zero-based */ qemu_sglist_add(sglist, le64_to_cpu(tbl[i].addr), le32_to_cpu(tbl[i].flags_size) + 1); @@ -745,7 +770,7 @@ static void process_ncq_command(AHCIState *s, int port, uint8_t *cmd_fis, ncq_tfs->lba, ncq_tfs->lba + ncq_tfs->sector_count - 2, s->dev[port].port.ifs[0].nb_sectors - 1); - ahci_populate_sglist(&s->dev[port], &ncq_tfs->sglist); + ahci_populate_sglist(&s->dev[port], &ncq_tfs->sglist, 0); ncq_tfs->tag = tag; switch(ncq_fis->command) { @@ -970,7 +995,7 @@ static int ahci_start_transfer(IDEDMA *dma) goto out; } - if (!ahci_populate_sglist(ad, &s->sg)) { + if (!ahci_populate_sglist(ad, &s->sg, 0)) { has_sglist = 1; } @@ -1015,6 +1040,7 @@ static void ahci_start_dma(IDEDMA *dma, IDEState *s, DPRINTF(ad->port_no, "\n"); ad->dma_cb = dma_cb; ad->dma_status |= BM_STATUS_DMAING; + s->io_buffer_offset = 0; dma_cb(s, 0); } @@ -1023,7 +1049,7 @@ static int ahci_dma_prepare_buf(IDEDMA *dma, int is_write) AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); IDEState *s = &ad->port.ifs[0]; - ahci_populate_sglist(ad, &s->sg); + ahci_populate_sglist(ad, &s->sg, 0); s->io_buffer_size = s->sg.size; DPRINTF(ad->port_no, "len=%#x\n", s->io_buffer_size); @@ -1037,7 +1063,7 @@ static int ahci_dma_rw_buf(IDEDMA *dma, int is_write) uint8_t *p = s->io_buffer + s->io_buffer_index; int l = s->io_buffer_size - s->io_buffer_index; - if (ahci_populate_sglist(ad, &s->sg)) { + if (ahci_populate_sglist(ad, &s->sg, s->io_buffer_offset)) { return 0; } @@ -1050,6 +1076,7 @@ static int ahci_dma_rw_buf(IDEDMA *dma, int is_write) /* update number of transferred bytes */ ad->cur_cmd->status = cpu_to_le32(le32_to_cpu(ad->cur_cmd->status) + l); s->io_buffer_index += l; + s->io_buffer_offset += l; DPRINTF(ad->port_no, "len=%#x\n", l); diff --git a/hw/ide/internal.h b/hw/ide/internal.h index 7170bd9cd0..bf7d313cf4 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -393,6 +393,7 @@ struct IDEState { struct iovec iov; QEMUIOVector qiov; /* ATA DMA state */ + int io_buffer_offset; int io_buffer_size; QEMUSGList sg; /* PIO transfer handling */ From ea8d82a1ed72634f089ed1bccccd9c84cc1ab855 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 3 Aug 2012 15:57:10 -0400 Subject: [PATCH 03/11] ahci: Fix sglist memleak in ahci_dma_rw_buf() I noticed that in hw/ide/ahci:ahci_dma_rw_buf() we do not free the sglist. Thus, I've added a call to qemu_sglist_destroy() to fix this memory leak. In addition, I've adeed a call in qemu_sglist_destroy() to 0 all of the sglist fields, in case there is some other codepath that tries to free the sglist. Signed-off-by: Jason Baron Signed-off-by: Kevin Wolf --- dma-helpers.c | 1 + hw/ide/ahci.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/dma-helpers.c b/dma-helpers.c index 35cb500581..13593d1b42 100644 --- a/dma-helpers.c +++ b/dma-helpers.c @@ -65,6 +65,7 @@ void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len) void qemu_sglist_destroy(QEMUSGList *qsg) { g_free(qsg->sg); + memset(qsg, 0, sizeof(*qsg)); } typedef struct { diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index de580a6eff..5ea3cadb01 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -1073,6 +1073,9 @@ static int ahci_dma_rw_buf(IDEDMA *dma, int is_write) dma_buf_write(p, l, &s->sg); } + /* free sglist that was created in ahci_populate_sglist() */ + qemu_sglist_destroy(&s->sg); + /* update number of transferred bytes */ ad->cur_cmd->status = cpu_to_le32(le32_to_cpu(ad->cur_cmd->status) + l); s->io_buffer_index += l; From 353a41be05f9616f7bd7120456f706b3c85683ea Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 9 Aug 2012 13:17:14 +0200 Subject: [PATCH 04/11] qemu-iotests: Save some sed processes Instead of building a huge pipeline, just pass all expressions to a single sed process. Suggested-by: Eric Blake Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- tests/qemu-iotests/common.rc | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 7782808a26..6b805161c8 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -105,16 +105,16 @@ _make_test_img() # XXX(hch): have global image options? $QEMU_IMG create -f $IMGFMT $extra_img_options $TEST_IMG $image_size | \ - sed -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" | \ - sed -e "s#$TEST_DIR#TEST_DIR#g" | \ - sed -e "s#$IMGFMT#IMGFMT#g" | \ - sed -e "s# encryption=off##g" | \ - sed -e "s# cluster_size=[0-9]\\+##g" | \ - sed -e "s# table_size=[0-9]\\+##g" | \ - sed -e "s# compat='[^']*'##g" | \ - sed -e "s# compat6=\\(on\\|off\\)##g" | \ - sed -e "s# static=\\(on\\|off\\)##g" | \ - sed -e "s# lazy_refcounts=\\(on\\|off\\)##g" + sed -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ + -e "s#$TEST_DIR#TEST_DIR#g" \ + -e "s#$IMGFMT#IMGFMT#g" \ + -e "s# encryption=off##g" \ + -e "s# cluster_size=[0-9]\\+##g" \ + -e "s# table_size=[0-9]\\+##g" \ + -e "s# compat='[^']*'##g" \ + -e "s# compat6=\\(on\\|off\\)##g" \ + -e "s# static=\\(on\\|off\\)##g" \ + -e "s# lazy_refcounts=\\(on\\|off\\)##g" } _cleanup_test_img() From 13e3dce068773c971ff2f19d986378c55897c4a3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 9 Aug 2012 16:07:19 +0200 Subject: [PATCH 05/11] virtio-blk: support VIRTIO_BLK_F_CONFIG_WCE Also rename VIRTIO_BLK_F_WCACHE to VIRTIO_BLK_F_WCE for consistency with the spec. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- hw/virtio-blk.c | 16 ++++++++++++++-- hw/virtio-blk.h | 4 +++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index 552b3b6c6a..97bb4bdc7c 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -510,9 +510,19 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) blkcfg.size_max = 0; blkcfg.physical_block_exp = get_physical_block_exp(s->conf); blkcfg.alignment_offset = 0; + blkcfg.wce = bdrv_enable_write_cache(s->bs); memcpy(config, &blkcfg, sizeof(struct virtio_blk_config)); } +static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) +{ + VirtIOBlock *s = to_virtio_blk(vdev); + struct virtio_blk_config blkcfg; + + memcpy(&blkcfg, config, sizeof(blkcfg)); + bdrv_set_enable_write_cache(s->bs, blkcfg.wce != 0); +} + static uint32_t virtio_blk_get_features(VirtIODevice *vdev, uint32_t features) { VirtIOBlock *s = to_virtio_blk(vdev); @@ -523,9 +533,10 @@ static uint32_t virtio_blk_get_features(VirtIODevice *vdev, uint32_t features) features |= (1 << VIRTIO_BLK_F_BLK_SIZE); features |= (1 << VIRTIO_BLK_F_SCSI); + features |= (1 << VIRTIO_BLK_F_CONFIG_WCE); if (bdrv_enable_write_cache(s->bs)) - features |= (1 << VIRTIO_BLK_F_WCACHE); - + features |= (1 << VIRTIO_BLK_F_WCE); + if (bdrv_is_read_only(s->bs)) features |= 1 << VIRTIO_BLK_F_RO; @@ -610,6 +621,7 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk) sizeof(VirtIOBlock)); s->vdev.get_config = virtio_blk_update_config; + s->vdev.set_config = virtio_blk_set_config; s->vdev.get_features = virtio_blk_get_features; s->vdev.reset = virtio_blk_reset; s->bs = blk->conf.bs; diff --git a/hw/virtio-blk.h b/hw/virtio-blk.h index 79ebccc95b..35834cf493 100644 --- a/hw/virtio-blk.h +++ b/hw/virtio-blk.h @@ -31,8 +31,9 @@ #define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/ #define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */ /* #define VIRTIO_BLK_F_IDENTIFY 8 ATA IDENTIFY supported, DEPRECATED */ -#define VIRTIO_BLK_F_WCACHE 9 /* write cache enabled */ +#define VIRTIO_BLK_F_WCE 9 /* write cache enabled */ #define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */ +#define VIRTIO_BLK_F_CONFIG_WCE 11 /* write cache configurable */ #define VIRTIO_BLK_ID_BYTES 20 /* ID string length */ @@ -49,6 +50,7 @@ struct virtio_blk_config uint8_t alignment_offset; uint16_t min_io_size; uint32_t opt_io_size; + uint8_t wce; } QEMU_PACKED; /* These two define direction. */ From 9315cbfd8d7074eca44fbc5f93902e97b27d5240 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 9 Aug 2012 16:07:20 +0200 Subject: [PATCH 06/11] virtio-blk: disable write cache if not negotiated If the guest does not support flushes, we should run in writethrough mode. The setting is temporary until the next reset, so that for example the BIOS will run in writethrough mode while Linux will run with a writeback cache. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- hw/virtio-blk.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index 97bb4bdc7c..fd8fa90792 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -543,6 +543,19 @@ static uint32_t virtio_blk_get_features(VirtIODevice *vdev, uint32_t features) return features; } +static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) +{ + VirtIOBlock *s = to_virtio_blk(vdev); + uint32_t features; + + if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) { + return; + } + + features = vdev->guest_features; + bdrv_set_enable_write_cache(s->bs, !!(features & (1 << VIRTIO_BLK_F_WCE))); +} + static void virtio_blk_save(QEMUFile *f, void *opaque) { VirtIOBlock *s = opaque; @@ -623,6 +636,7 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk) s->vdev.get_config = virtio_blk_update_config; s->vdev.set_config = virtio_blk_set_config; s->vdev.get_features = virtio_blk_get_features; + s->vdev.set_status = virtio_blk_set_status; s->vdev.reset = virtio_blk_reset; s->bs = blk->conf.bs; s->conf = &blk->conf; From 1f212b9d3edd8679bafd3bcf0301795206438724 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 9 Aug 2012 16:07:21 +0200 Subject: [PATCH 07/11] blockdev: flip default cache mode from writethrough to writeback Now all major device models (IDE, SCSI, virtio) can choose between writethrough and writeback at run-time, and virtio will even revert to writethrough if the guest is not capable of sending flushes. So we can change the default to writeback at last. Tested, for lack of a better idea, with a breakpoint on bdrv_open and all cache choices one by one. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- blockdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/blockdev.c b/blockdev.c index 8669142704..7c83baa353 100644 --- a/blockdev.c +++ b/blockdev.c @@ -377,6 +377,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi) } } + bdrv_flags |= BDRV_O_CACHE_WB; if ((buf = qemu_opt_get(opts, "cache")) != NULL) { if (bdrv_parse_cache_flags(buf, &bdrv_flags) != 0) { error_report("invalid cache option"); From b10170aca0616df85482dcc7ddda03437bc07cca Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 9 Aug 2012 13:05:54 +0100 Subject: [PATCH 08/11] qed: mark image clean after repair succeeds The dirty bit is cleared after image repair succeeds in qed_open(). Move this into qed_check() so that all callers benefit from this behavior when fix=true. This is necessary so qemu-img check can call .bdrv_check() and mark the image clean. Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/qed-check.c | 26 ++++++++++++++++++++++++++ block/qed.c | 9 +-------- block/qed.h | 5 +++++ 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/block/qed-check.c b/block/qed-check.c index 5edf60775b..b473dcd61f 100644 --- a/block/qed-check.c +++ b/block/qed-check.c @@ -194,6 +194,28 @@ static void qed_check_for_leaks(QEDCheck *check) } } +/** + * Mark an image clean once it passes check or has been repaired + */ +static void qed_check_mark_clean(BDRVQEDState *s, BdrvCheckResult *result) +{ + /* Skip if there were unfixable corruptions or I/O errors */ + if (result->corruptions > 0 || result->check_errors > 0) { + return; + } + + /* Skip if image is already marked clean */ + if (!(s->header.features & QED_F_NEED_CHECK)) { + return; + } + + /* Ensure fixes reach storage before clearing check bit */ + bdrv_flush(s->bs); + + s->header.features &= ~QED_F_NEED_CHECK; + qed_write_header_sync(s); +} + int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix) { QEDCheck check = { @@ -215,6 +237,10 @@ int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix) if (ret == 0) { /* Only check for leaks if entire image was scanned successfully */ qed_check_for_leaks(&check); + + if (fix) { + qed_check_mark_clean(s, result); + } } g_free(check.used_clusters); diff --git a/block/qed.c b/block/qed.c index 5f3eefa3af..226545d8e7 100644 --- a/block/qed.c +++ b/block/qed.c @@ -89,7 +89,7 @@ static void qed_header_cpu_to_le(const QEDHeader *cpu, QEDHeader *le) le->backing_filename_size = cpu_to_le32(cpu->backing_filename_size); } -static int qed_write_header_sync(BDRVQEDState *s) +int qed_write_header_sync(BDRVQEDState *s) { QEDHeader le; int ret; @@ -491,13 +491,6 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags) if (ret) { goto out; } - if (!result.corruptions && !result.check_errors) { - /* Ensure fixes reach storage before clearing check bit */ - bdrv_flush(s->bs); - - s->header.features &= ~QED_F_NEED_CHECK; - qed_write_header_sync(s); - } } } diff --git a/block/qed.h b/block/qed.h index c716772ad7..a063bf70af 100644 --- a/block/qed.h +++ b/block/qed.h @@ -210,6 +210,11 @@ typedef struct { void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque); void gencb_complete(void *opaque, int ret); +/** + * Header functions + */ +int qed_write_header_sync(BDRVQEDState *s); + /** * L2 cache functions */ From acbe59829e448aa63bdccc6ee484b7e1ac605e25 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 9 Aug 2012 13:05:55 +0100 Subject: [PATCH 09/11] qcow2: mark image clean after repair succeeds The dirty bit is cleared after image repair succeeds in qcow2_open(). Move this into qcow2_check() so that all callers benefit from this behavior when fix mode is enabled. This is necessary so qemu-img check can call .bdrv_check() and mark the image clean. Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/qcow2.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index fd5e214431..5896fd6d9c 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -270,6 +270,20 @@ static int qcow2_mark_clean(BlockDriverState *bs) return 0; } +static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result, + BdrvCheckMode fix) +{ + int ret = qcow2_check_refcounts(bs, result, fix); + if (ret < 0) { + return ret; + } + + if (fix && result->check_errors == 0 && result->corruptions == 0) { + return qcow2_mark_clean(bs); + } + return ret; +} + static int qcow2_open(BlockDriverState *bs, int flags) { BDRVQcowState *s = bs->opaque; @@ -474,12 +488,7 @@ static int qcow2_open(BlockDriverState *bs, int flags) !bs->read_only) { BdrvCheckResult result = {0}; - ret = qcow2_check_refcounts(bs, &result, BDRV_FIX_ERRORS); - if (ret < 0) { - goto fail; - } - - ret = qcow2_mark_clean(bs); + ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS); if (ret < 0) { goto fail; } @@ -1568,13 +1577,6 @@ static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) return 0; } - -static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result, - BdrvCheckMode fix) -{ - return qcow2_check_refcounts(bs, result, fix); -} - #if 0 static void dump_refcounts(BlockDriverState *bs) { From 058f8f16db0c1c528b665a6283457f019c8b0926 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 9 Aug 2012 13:05:56 +0100 Subject: [PATCH 10/11] block: add BLOCK_O_CHECK for qemu-img check Image formats with a dirty bit, like qed and qcow2, repair dirty image files upon open with BDRV_O_RDWR. Performing automatic repair when qemu-img check runs is not ideal because the bdrv_open() call repairs the image before the actual bdrv_check() call from qemu-img.c. Fix this "double repair" since it leads to confusing output from qemu-img check. Tell the block driver that this image is being opened just for bdrv_check(). This skips automatic repair and qemu-img.c can invoke it manually with bdrv_check(). Update the golden output for qemu-iotests 039 to reflect the new qemu-img check output. Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block.h | 1 + block/qcow2.c | 4 ++-- block/qed.c | 2 +- qemu-img.c | 2 +- tests/qemu-iotests/039.out | 6 ++++++ 5 files changed, 11 insertions(+), 4 deletions(-) diff --git a/block.h b/block.h index 650d872f46..2e2be11071 100644 --- a/block.h +++ b/block.h @@ -79,6 +79,7 @@ typedef struct BlockDevOps { #define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */ #define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */ #define BDRV_O_INCOMING 0x0800 /* consistency hint for incoming migration */ +#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */ #define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH) diff --git a/block/qcow2.c b/block/qcow2.c index 5896fd6d9c..8f183f1465 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -484,8 +484,8 @@ static int qcow2_open(BlockDriverState *bs, int flags) qemu_co_mutex_init(&s->lock); /* Repair image if dirty */ - if ((s->incompatible_features & QCOW2_INCOMPAT_DIRTY) && - !bs->read_only) { + if (!(flags & BDRV_O_CHECK) && !bs->read_only && + (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) { BdrvCheckResult result = {0}; ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS); diff --git a/block/qed.c b/block/qed.c index 226545d8e7..a02dbfd72d 100644 --- a/block/qed.c +++ b/block/qed.c @@ -477,7 +477,7 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags) } /* If image was not closed cleanly, check consistency */ - if (s->header.features & QED_F_NEED_CHECK) { + if (!(flags & BDRV_O_CHECK) && (s->header.features & QED_F_NEED_CHECK)) { /* Read-only images cannot be fixed. There is no risk of corruption * since write operations are not possible. Therefore, allow * potentially inconsistent images to be opened read-only. This can diff --git a/qemu-img.c b/qemu-img.c index 94a31ad9f0..b41e670a61 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -379,7 +379,7 @@ static int img_check(int argc, char **argv) BlockDriverState *bs; BdrvCheckResult result; int fix = 0; - int flags = BDRV_O_FLAGS; + int flags = BDRV_O_FLAGS | BDRV_O_CHECK; fmt = NULL; for(;;) { diff --git a/tests/qemu-iotests/039.out b/tests/qemu-iotests/039.out index 155a05e109..cb510d6716 100644 --- a/tests/qemu-iotests/039.out +++ b/tests/qemu-iotests/039.out @@ -26,6 +26,12 @@ incompatible_features 0x1 == Repairing the image file must succeed == ERROR OFLAG_COPIED: offset=8000000000050000 refcount=0 Repairing cluster 5 refcount=0 reference=1 +The following inconsistencies were found and repaired: + + 0 leaked clusters + 1 corruptions + +Double checking the fixed image now... No errors were found on the image. incompatible_features 0x0 From 166f3c7b7026f9cd55a7daeec3b3444ec41092ab Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 9 Aug 2012 13:05:57 +0100 Subject: [PATCH 11/11] qemu-iotests: skip 039 with ./check -nocache When the qemu-io --nocache option is used the 039 test case cannot abort QEMU at a point where the image is dirty. Skip the test case. Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/qemu-iotests/039 | 1 + tests/qemu-iotests/common.rc | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039 index a749fcf23b..c5ae806ecb 100755 --- a/tests/qemu-iotests/039 +++ b/tests/qemu-iotests/039 @@ -44,6 +44,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow2 _supported_proto generic _supported_os Linux +_unsupported_qemu_io_options --nocache size=128M diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 6b805161c8..d534e9466d 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -297,6 +297,20 @@ _supported_os() _notrun "not suitable for this OS: $HOSTOS" } +_unsupported_qemu_io_options() +{ + for bad_opt + do + for opt in $QEMU_IO_OPTIONS + do + if [ "$bad_opt" = "$opt" ] + then + _notrun "not suitable for qemu-io option: $bad_opt" + fi + done + done +} + # this test requires that a specified command (executable) exists # _require_command()