From 765d9df9626f45a821f221f7a46ef524354b3600 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 28 Sep 2017 14:13:25 -0500 Subject: [PATCH 01/54] block: Typo fix in copy_on_readv() Signed-off-by: Eric Blake Signed-off-by: Kevin Wolf --- block/io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/io.c b/block/io.c index 4378ae4c7d..d633b0f851 100644 --- a/block/io.c +++ b/block/io.c @@ -954,7 +954,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, /* FIXME We cannot require callers to have write permissions when all they * are doing is a read request. If we did things right, write permissions * would be obtained anyway, but internally by the copy-on-read code. As - * long as it is implemented here rather than in a separat filter driver, + * long as it is implemented here rather than in a separate filter driver, * the copy-on-read code doesn't have its own BdrvChild, however, for which * it could request permissions. Therefore we have to bypass the permission * system for the moment. */ From a8b42a1c09e751b9f921a1a73756411fc118020b Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:07 -0500 Subject: [PATCH 02/54] block: Make bdrv_img_create() size selection easier to read All callers of bdrv_img_create() pass in a size, or -1 to read the size from the backing file. We then set that size as the QemuOpt default, which means we will reuse that default rather than the final parameter to qemu_opt_get_size() several lines later. But it is rather confusing to read subsequent checks of 'size == -1' when it looks (without seeing the full context) like size defaults to 0; it also doesn't help that a size of 0 is valid (for some formats). Rework the logic to make things more legible. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block.c b/block.c index 5c65fac672..528cda7b2c 100644 --- a/block.c +++ b/block.c @@ -4488,7 +4488,7 @@ void bdrv_img_create(const char *filename, const char *fmt, /* The size for the image must always be specified, unless we have a backing * file and we have not been forbidden from opening it. */ - size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); + size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, img_size); if (backing_file && !(flags & BDRV_O_NO_BACKING)) { BlockDriverState *bs; char *full_backing = g_new0(char, PATH_MAX); From ecbfa2817d61fd102d291487ec1e61571b3b6581 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:08 -0500 Subject: [PATCH 03/54] hbitmap: Rename serialization_granularity to serialization_align The only client of hbitmap_serialization_granularity() is dirty-bitmap's bdrv_dirty_bitmap_serialization_align(). Keeping the two names consistent is worthwhile, and the shorter name is more representative of what the function returns (the required alignment to be used for start/count of other serialization functions, where violating the alignment causes assertion failures). Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 2 +- include/qemu/hbitmap.h | 8 ++++---- tests/test-hbitmap.c | 10 +++++----- util/hbitmap.c | 8 ++++---- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 30462d4f9a..0490ca3aff 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -617,7 +617,7 @@ uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap, uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap) { - return hbitmap_serialization_granularity(bitmap->bitmap); + return hbitmap_serialization_align(bitmap->bitmap); } void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap, diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h index d3a74a21fc..81e78043d1 100644 --- a/include/qemu/hbitmap.h +++ b/include/qemu/hbitmap.h @@ -159,16 +159,16 @@ bool hbitmap_get(const HBitmap *hb, uint64_t item); bool hbitmap_is_serializable(const HBitmap *hb); /** - * hbitmap_serialization_granularity: + * hbitmap_serialization_align: * @hb: HBitmap to operate on. * - * Granularity of serialization chunks, used by other serialization functions. - * For every chunk: + * Required alignment of serialization chunks, used by other serialization + * functions. For every chunk: * 1. Chunk start should be aligned to this granularity. * 2. Chunk size should be aligned too, except for last chunk (for which * start + count == hb->size) */ -uint64_t hbitmap_serialization_granularity(const HBitmap *hb); +uint64_t hbitmap_serialization_align(const HBitmap *hb); /** * hbitmap_serialization_size: diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c index 1acb353889..af41642346 100644 --- a/tests/test-hbitmap.c +++ b/tests/test-hbitmap.c @@ -738,15 +738,15 @@ static void test_hbitmap_meta_one(TestHBitmapData *data, const void *unused) } } -static void test_hbitmap_serialize_granularity(TestHBitmapData *data, - const void *unused) +static void test_hbitmap_serialize_align(TestHBitmapData *data, + const void *unused) { int r; hbitmap_test_init(data, L3 * 2, 3); g_assert(hbitmap_is_serializable(data->hb)); - r = hbitmap_serialization_granularity(data->hb); + r = hbitmap_serialization_align(data->hb); g_assert_cmpint(r, ==, 64 << 3); } @@ -974,8 +974,8 @@ int main(int argc, char **argv) hbitmap_test_add("/hbitmap/meta/word", test_hbitmap_meta_word); hbitmap_test_add("/hbitmap/meta/sector", test_hbitmap_meta_sector); - hbitmap_test_add("/hbitmap/serialize/granularity", - test_hbitmap_serialize_granularity); + hbitmap_test_add("/hbitmap/serialize/align", + test_hbitmap_serialize_align); hbitmap_test_add("/hbitmap/serialize/basic", test_hbitmap_serialize_basic); hbitmap_test_add("/hbitmap/serialize/part", diff --git a/util/hbitmap.c b/util/hbitmap.c index 21535cc90b..2f9d0fdbd0 100644 --- a/util/hbitmap.c +++ b/util/hbitmap.c @@ -413,14 +413,14 @@ bool hbitmap_is_serializable(const HBitmap *hb) { /* Every serialized chunk must be aligned to 64 bits so that endianness * requirements can be fulfilled on both 64 bit and 32 bit hosts. - * We have hbitmap_serialization_granularity() which converts this + * We have hbitmap_serialization_align() which converts this * alignment requirement from bitmap bits to items covered (e.g. sectors). * That value is: * 64 << hb->granularity * Since this value must not exceed UINT64_MAX, hb->granularity must be * less than 58 (== 64 - 6, where 6 is ld(64), i.e. 1 << 6 == 64). * - * In order for hbitmap_serialization_granularity() to always return a + * In order for hbitmap_serialization_align() to always return a * meaningful value, bitmaps that are to be serialized must have a * granularity of less than 58. */ @@ -437,7 +437,7 @@ bool hbitmap_get(const HBitmap *hb, uint64_t item) return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0; } -uint64_t hbitmap_serialization_granularity(const HBitmap *hb) +uint64_t hbitmap_serialization_align(const HBitmap *hb) { assert(hbitmap_is_serializable(hb)); @@ -454,7 +454,7 @@ static void serialization_chunk(const HBitmap *hb, unsigned long **first_el, uint64_t *el_count) { uint64_t last = start + count - 1; - uint64_t gran = hbitmap_serialization_granularity(hb); + uint64_t gran = hbitmap_serialization_align(hb); assert((start & (gran - 1)) == 0); assert((last >> hb->granularity) < hb->size); From 113754f3a83dac7989ca94c4408a494560df1d73 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:09 -0500 Subject: [PATCH 04/54] qcow2: Ensure bitmap serialization is aligned When subdividing a bitmap serialization, the code in hbitmap.c enforces that start/count parameters are aligned (except that count can end early at end-of-bitmap). We exposed this required alignment through bdrv_dirty_bitmap_serialization_align(), but forgot to actually check that we comply with it. Fortunately, qcow2 is never dividing bitmap serialization smaller than one cluster (which is a minimum of 512 bytes); so we are always compliant with the serialization alignment (which insists that we partition at least 64 bits per chunk) because we are doing at least 4k bits per chunk. Still, it's safer to add an assertion (for the unlikely case that we'd ever support a cluster smaller than 512 bytes, or if the hbitmap implementation changes what it considers to be aligned), rather than leaving bdrv_dirty_bitmap_serialization_align() without a caller. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/qcow2-bitmap.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 14f41d0427..1fc375b40a 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -274,10 +274,13 @@ static int free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb) static uint64_t sectors_covered_by_bitmap_cluster(const BDRVQcow2State *s, const BdrvDirtyBitmap *bitmap) { - uint32_t sector_granularity = + uint64_t sector_granularity = bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS; + uint64_t sbc = sector_granularity * (s->cluster_size << 3); - return (uint64_t)sector_granularity * (s->cluster_size << 3); + assert(QEMU_IS_ALIGNED(sbc, + bdrv_dirty_bitmap_serialization_align(bitmap))); + return sbc; } /* load_bitmap_data From dfe55c35772b7ebc6f282a94d4048c9f85b7b26b Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:10 -0500 Subject: [PATCH 05/54] dirty-bitmap: Drop unused functions We had several functions that no one is currently using, and which use sector-based interfaces. I'm trying to convert towards byte-based interfaces, so it's easier to just drop the unused functions: bdrv_dirty_bitmap_get_meta bdrv_dirty_bitmap_get_meta_locked bdrv_dirty_bitmap_reset_meta bdrv_dirty_bitmap_meta_granularity Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 44 ------------------------------------ include/block/dirty-bitmap.h | 10 -------- 2 files changed, 54 deletions(-) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 0490ca3aff..42a55e4a4b 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -173,45 +173,6 @@ void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap) qemu_mutex_unlock(bitmap->mutex); } -int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, int64_t sector, - int nb_sectors) -{ - uint64_t i; - int sectors_per_bit = 1 << hbitmap_granularity(bitmap->meta); - - /* To optimize: we can make hbitmap to internally check the range in a - * coarse level, or at least do it word by word. */ - for (i = sector; i < sector + nb_sectors; i += sectors_per_bit) { - if (hbitmap_get(bitmap->meta, i)) { - return true; - } - } - return false; -} - -int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, int64_t sector, - int nb_sectors) -{ - bool dirty; - - qemu_mutex_lock(bitmap->mutex); - dirty = bdrv_dirty_bitmap_get_meta_locked(bs, bitmap, sector, nb_sectors); - qemu_mutex_unlock(bitmap->mutex); - - return dirty; -} - -void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, int64_t sector, - int nb_sectors) -{ - qemu_mutex_lock(bitmap->mutex); - hbitmap_reset(bitmap->meta, sector, nb_sectors); - qemu_mutex_unlock(bitmap->mutex); -} - int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap) { return bitmap->size; @@ -511,11 +472,6 @@ uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap) return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); } -uint32_t bdrv_dirty_bitmap_meta_granularity(BdrvDirtyBitmap *bitmap) -{ - return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->meta); -} - BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, uint64_t first_sector) { diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index a79a58d2c3..8fd842eac9 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -34,7 +34,6 @@ void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap); BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs); uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs); uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap); -uint32_t bdrv_dirty_bitmap_meta_granularity(BdrvDirtyBitmap *bitmap); bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap); bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap); const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap); @@ -44,15 +43,6 @@ void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int64_t nr_sectors); void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int64_t nr_sectors); -int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, int64_t sector, - int nb_sectors); -int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, int64_t sector, - int nb_sectors); -void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, int64_t sector, - int nb_sectors); BdrvDirtyBitmapIter *bdrv_dirty_meta_iter_new(BdrvDirtyBitmap *bitmap); BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, uint64_t first_sector); From 1b6cc579ded4f9d7923d9d59147512edd623f4c0 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:11 -0500 Subject: [PATCH 06/54] dirty-bitmap: Avoid size query failure during truncate We've previously fixed several places where we failed to account for possible errors from bdrv_nb_sectors(). Fix another one by making bdrv_dirty_bitmap_truncate() take the new size from the caller instead of querying itself; then adjust the sole caller bdrv_truncate() to pass the size just determined by a successful resize, or to reuse the size given to the original truncate operation when refresh_total_sectors() was not able to confirm the actual size (the two sizes can potentially differ according to rounding constraints), thus avoiding sizing the bitmaps to -1. This also fixes a bug where not all failure paths in bdrv_truncate() would set errp. Note that bdrv_truncate() is still a bit awkward. We may want to revisit it later and clean up things to better guarantee that a resize attempt either fails cleanly up front, or cannot fail after guest-visible changes have been made (if temporary changes are made, then they need to be cleanly rolled back). But that is a task for another day; for now, the goal is the bare minimum fix to ensure that just bdrv_dirty_bitmap_truncate() cannot fail. Signed-off-by: Eric Blake Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: John Snow Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block.c | 16 +++++++++++----- block/dirty-bitmap.c | 6 +++--- include/block/dirty-bitmap.h | 2 +- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/block.c b/block.c index 528cda7b2c..ef5af81f66 100644 --- a/block.c +++ b/block.c @@ -3545,12 +3545,18 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, assert(!(bs->open_flags & BDRV_O_INACTIVE)); ret = drv->bdrv_truncate(bs, offset, prealloc, errp); - if (ret == 0) { - ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); - bdrv_dirty_bitmap_truncate(bs); - bdrv_parent_cb_resize(bs); - atomic_inc(&bs->write_gen); + if (ret < 0) { + return ret; } + ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not refresh total sector count"); + } else { + offset = bs->total_sectors * BDRV_SECTOR_SIZE; + } + bdrv_dirty_bitmap_truncate(bs, offset); + bdrv_parent_cb_resize(bs); + atomic_inc(&bs->write_gen); return ret; } diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 42a55e4a4b..ee164fb518 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -1,7 +1,7 @@ /* * Block Dirty Bitmap * - * Copyright (c) 2016 Red Hat. Inc + * Copyright (c) 2016-2017 Red Hat. Inc * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -302,10 +302,10 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, * Truncates _all_ bitmaps attached to a BDS. * Called with BQL taken. */ -void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) +void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes) { BdrvDirtyBitmap *bitmap; - uint64_t size = bdrv_nb_sectors(bs); + int64_t size = DIV_ROUND_UP(bytes, BDRV_SECTOR_SIZE); bdrv_dirty_bitmaps_lock(bs); QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index 8fd842eac9..7a27590047 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -83,7 +83,7 @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter); void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num); int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap); -void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); +void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes); bool bdrv_dirty_bitmap_readonly(const BdrvDirtyBitmap *bitmap); bool bdrv_has_readonly_bitmaps(BlockDriverState *bs); bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap); From ebfcd2e75f719c5d74ba72bbca84fa9854b6698f Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:12 -0500 Subject: [PATCH 07/54] dirty-bitmap: Change bdrv_dirty_bitmap_size() to report bytes We're already reporting bytes for bdrv_dirty_bitmap_granularity(); mixing bytes and sectors in our return values is a recipe for confusion. A later cleanup will convert dirty bitmap internals to be entirely byte-based, but in the meantime, we should report the bitmap size in bytes. The only external caller in qcow2-bitmap.c is temporarily more verbose (because it is still using sector-based math), but will later be switched to track progress by bytes instead of sectors. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 2 +- block/qcow2-bitmap.c | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index ee164fb518..755555af32 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -175,7 +175,7 @@ void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap) int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap) { - return bitmap->size; + return bitmap->size * BDRV_SECTOR_SIZE; } const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap) diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 1fc375b40a..9c185b5202 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -295,10 +295,11 @@ static int load_bitmap_data(BlockDriverState *bs, BDRVQcow2State *s = bs->opaque; uint64_t sector, sbc; uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap); + uint64_t bm_sectors = DIV_ROUND_UP(bm_size, BDRV_SECTOR_SIZE); uint8_t *buf = NULL; uint64_t i, tab_size = size_to_clusters(s, - bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_size)); + bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_sectors)); if (tab_size != bitmap_table_size || tab_size > BME_MAX_TABLE_SIZE) { return -EINVAL; @@ -307,7 +308,7 @@ static int load_bitmap_data(BlockDriverState *bs, buf = g_malloc(s->cluster_size); sbc = sectors_covered_by_bitmap_cluster(s, bitmap); for (i = 0, sector = 0; i < tab_size; ++i, sector += sbc) { - uint64_t count = MIN(bm_size - sector, sbc); + uint64_t count = MIN(bm_sectors - sector, sbc); uint64_t entry = bitmap_table[i]; uint64_t offset = entry & BME_TABLE_ENTRY_OFFSET_MASK; @@ -1077,13 +1078,14 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, int64_t sector; uint64_t sbc; uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap); + uint64_t bm_sectors = DIV_ROUND_UP(bm_size, BDRV_SECTOR_SIZE); const char *bm_name = bdrv_dirty_bitmap_name(bitmap); uint8_t *buf = NULL; BdrvDirtyBitmapIter *dbi; uint64_t *tb; uint64_t tb_size = size_to_clusters(s, - bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_size)); + bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_sectors)); if (tb_size > BME_MAX_TABLE_SIZE || tb_size * s->cluster_size > BME_MAX_PHYS_SIZE) @@ -1101,7 +1103,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, dbi = bdrv_dirty_iter_new(bitmap, 0); buf = g_malloc(s->cluster_size); sbc = sectors_covered_by_bitmap_cluster(s, bitmap); - assert(DIV_ROUND_UP(bm_size, sbc) == tb_size); + assert(DIV_ROUND_UP(bm_sectors, sbc) == tb_size); while ((sector = bdrv_dirty_iter_next(dbi)) != -1) { uint64_t cluster = sector / sbc; @@ -1109,7 +1111,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, int64_t off; sector = cluster * sbc; - end = MIN(bm_size, sector + sbc); + end = MIN(bm_sectors, sector + sbc); write_size = bdrv_dirty_bitmap_serialization_size(bitmap, sector, end - sector); assert(write_size <= s->cluster_size); @@ -1141,7 +1143,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, goto fail; } - if (end >= bm_size) { + if (end >= bm_sectors) { break; } From 993e6525bfcc67ba48fe55bd64ec043a4b721e1d Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:13 -0500 Subject: [PATCH 08/54] dirty-bitmap: Track bitmap size by bytes We are still using an internal hbitmap that tracks a size in sectors, with the granularity scaled down accordingly, because it lets us use a shortcut for our iterators which are currently sector-based. But there's no reason we can't track the dirty bitmap size in bytes, since it is (mostly) an internal-only variable (remember, the size is how many bytes are covered by the bitmap, not how many bytes the bitmap occupies). A later cleanup will convert dirty bitmap internals to be entirely byte-based, eliminating the intermediate sector rounding added here; and technically, since bdrv_getlength() already rounds up to sectors, our use of DIV_ROUND_UP is more for theoretical completeness than for any actual rounding. Use is_power_of_2() while at it, instead of open-coding that. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 755555af32..6d32554b4e 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -42,7 +42,7 @@ struct BdrvDirtyBitmap { HBitmap *meta; /* Meta dirty bitmap */ BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ char *name; /* Optional non-empty unique ID */ - int64_t size; /* Size of the bitmap (Number of sectors) */ + int64_t size; /* Size of the bitmap, in bytes */ bool disabled; /* Bitmap is disabled. It ignores all writes to the device */ int active_iterators; /* How many iterators are active */ @@ -115,17 +115,14 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, { int64_t bitmap_size; BdrvDirtyBitmap *bitmap; - uint32_t sector_granularity; - assert((granularity & (granularity - 1)) == 0); + assert(is_power_of_2(granularity) && granularity >= BDRV_SECTOR_SIZE); if (name && bdrv_find_dirty_bitmap(bs, name)) { error_setg(errp, "Bitmap already exists: %s", name); return NULL; } - sector_granularity = granularity >> BDRV_SECTOR_BITS; - assert(sector_granularity); - bitmap_size = bdrv_nb_sectors(bs); + bitmap_size = bdrv_getlength(bs); if (bitmap_size < 0) { error_setg_errno(errp, -bitmap_size, "could not get length of device"); errno = -bitmap_size; @@ -133,7 +130,12 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, } bitmap = g_new0(BdrvDirtyBitmap, 1); bitmap->mutex = &bs->dirty_bitmap_mutex; - bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity)); + /* + * TODO - let hbitmap track full granularity. For now, it is tracking + * only sector granularity, as a shortcut for our iterators. + */ + bitmap->bitmap = hbitmap_alloc(DIV_ROUND_UP(bitmap_size, BDRV_SECTOR_SIZE), + ctz32(granularity) - BDRV_SECTOR_BITS); bitmap->size = bitmap_size; bitmap->name = g_strdup(name); bitmap->disabled = false; @@ -175,7 +177,7 @@ void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap) int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap) { - return bitmap->size * BDRV_SECTOR_SIZE; + return bitmap->size; } const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap) @@ -305,14 +307,13 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes) { BdrvDirtyBitmap *bitmap; - int64_t size = DIV_ROUND_UP(bytes, BDRV_SECTOR_SIZE); bdrv_dirty_bitmaps_lock(bs); QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { assert(!bdrv_dirty_bitmap_frozen(bitmap)); assert(!bitmap->active_iterators); - hbitmap_truncate(bitmap->bitmap, size); - bitmap->size = size; + hbitmap_truncate(bitmap->bitmap, DIV_ROUND_UP(bytes, BDRV_SECTOR_SIZE)); + bitmap->size = bytes; } bdrv_dirty_bitmaps_unlock(bs); } @@ -549,7 +550,8 @@ void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out) hbitmap_reset_all(bitmap->bitmap); } else { HBitmap *backup = bitmap->bitmap; - bitmap->bitmap = hbitmap_alloc(bitmap->size, + bitmap->bitmap = hbitmap_alloc(DIV_ROUND_UP(bitmap->size, + BDRV_SECTOR_SIZE), hbitmap_granularity(backup)); *out = backup; } From 86f6ae67e157362f3b141649874213ce01dcc622 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:14 -0500 Subject: [PATCH 09/54] dirty-bitmap: Change bdrv_dirty_bitmap_*serialize*() to take bytes Right now, the dirty-bitmap code exposes the fact that we use a scale of sector granularity in the underlying hbitmap to anything that wants to serialize a dirty bitmap. It's nicer to uniformly expose bytes as our dirty-bitmap interface, matching the previous change to bitmap size. The only caller to serialization is currently qcow2-cluster.c, which becomes a bit more verbose because it is still tracking sectors for other reasons, but a later patch will fix that to more uniformly use byte offsets everywhere. Likewise, within dirty-bitmap, we have to add more assertions that we are not truncating incorrectly, which can go away once the internal hbitmap is byte-based rather than sector-based. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 37 +++++++++++++++++++++++------------- block/qcow2-bitmap.c | 22 +++++++++++++-------- include/block/dirty-bitmap.h | 14 +++++++------- 3 files changed, 45 insertions(+), 28 deletions(-) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 6d32554b4e..555c736024 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -568,42 +568,53 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in) } uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap, - uint64_t start, uint64_t count) + uint64_t offset, uint64_t bytes) { - return hbitmap_serialization_size(bitmap->bitmap, start, count); + assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); + return hbitmap_serialization_size(bitmap->bitmap, + offset >> BDRV_SECTOR_BITS, + bytes >> BDRV_SECTOR_BITS); } uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap) { - return hbitmap_serialization_align(bitmap->bitmap); + return hbitmap_serialization_align(bitmap->bitmap) * BDRV_SECTOR_SIZE; } void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap, - uint8_t *buf, uint64_t start, - uint64_t count) + uint8_t *buf, uint64_t offset, + uint64_t bytes) { - hbitmap_serialize_part(bitmap->bitmap, buf, start, count); + assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); + hbitmap_serialize_part(bitmap->bitmap, buf, offset >> BDRV_SECTOR_BITS, + bytes >> BDRV_SECTOR_BITS); } void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap *bitmap, - uint8_t *buf, uint64_t start, - uint64_t count, bool finish) + uint8_t *buf, uint64_t offset, + uint64_t bytes, bool finish) { - hbitmap_deserialize_part(bitmap->bitmap, buf, start, count, finish); + assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); + hbitmap_deserialize_part(bitmap->bitmap, buf, offset >> BDRV_SECTOR_BITS, + bytes >> BDRV_SECTOR_BITS, finish); } void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap, - uint64_t start, uint64_t count, + uint64_t offset, uint64_t bytes, bool finish) { - hbitmap_deserialize_zeroes(bitmap->bitmap, start, count, finish); + assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); + hbitmap_deserialize_zeroes(bitmap->bitmap, offset >> BDRV_SECTOR_BITS, + bytes >> BDRV_SECTOR_BITS, finish); } void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap, - uint64_t start, uint64_t count, + uint64_t offset, uint64_t bytes, bool finish) { - hbitmap_deserialize_ones(bitmap->bitmap, start, count, finish); + assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); + hbitmap_deserialize_ones(bitmap->bitmap, offset >> BDRV_SECTOR_BITS, + bytes >> BDRV_SECTOR_BITS, finish); } void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap) diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 9c185b5202..9e799996e6 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -278,7 +278,7 @@ static uint64_t sectors_covered_by_bitmap_cluster(const BDRVQcow2State *s, bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS; uint64_t sbc = sector_granularity * (s->cluster_size << 3); - assert(QEMU_IS_ALIGNED(sbc, + assert(QEMU_IS_ALIGNED(sbc << BDRV_SECTOR_BITS, bdrv_dirty_bitmap_serialization_align(bitmap))); return sbc; } @@ -299,7 +299,7 @@ static int load_bitmap_data(BlockDriverState *bs, uint8_t *buf = NULL; uint64_t i, tab_size = size_to_clusters(s, - bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_sectors)); + bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_size)); if (tab_size != bitmap_table_size || tab_size > BME_MAX_TABLE_SIZE) { return -EINVAL; @@ -316,7 +316,9 @@ static int load_bitmap_data(BlockDriverState *bs, if (offset == 0) { if (entry & BME_TABLE_ENTRY_FLAG_ALL_ONES) { - bdrv_dirty_bitmap_deserialize_ones(bitmap, sector, count, + bdrv_dirty_bitmap_deserialize_ones(bitmap, + sector * BDRV_SECTOR_SIZE, + count * BDRV_SECTOR_SIZE, false); } else { /* No need to deserialize zeros because the dirty bitmap is @@ -327,7 +329,9 @@ static int load_bitmap_data(BlockDriverState *bs, if (ret < 0) { goto finish; } - bdrv_dirty_bitmap_deserialize_part(bitmap, buf, sector, count, + bdrv_dirty_bitmap_deserialize_part(bitmap, buf, + sector * BDRV_SECTOR_SIZE, + count * BDRV_SECTOR_SIZE, false); } } @@ -1085,7 +1089,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, uint64_t *tb; uint64_t tb_size = size_to_clusters(s, - bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_sectors)); + bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_size)); if (tb_size > BME_MAX_TABLE_SIZE || tb_size * s->cluster_size > BME_MAX_PHYS_SIZE) @@ -1112,8 +1116,8 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, sector = cluster * sbc; end = MIN(bm_sectors, sector + sbc); - write_size = - bdrv_dirty_bitmap_serialization_size(bitmap, sector, end - sector); + write_size = bdrv_dirty_bitmap_serialization_size(bitmap, + sector * BDRV_SECTOR_SIZE, (end - sector) * BDRV_SECTOR_SIZE); assert(write_size <= s->cluster_size); off = qcow2_alloc_clusters(bs, s->cluster_size); @@ -1125,7 +1129,9 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, } tb[cluster] = off; - bdrv_dirty_bitmap_serialize_part(bitmap, buf, sector, end - sector); + bdrv_dirty_bitmap_serialize_part(bitmap, buf, + sector * BDRV_SECTOR_SIZE, + (end - sector) * BDRV_SECTOR_SIZE); if (write_size < s->cluster_size) { memset(buf + write_size, 0, s->cluster_size - write_size); } diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index 7a27590047..5f34a1a3c7 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -49,19 +49,19 @@ BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter); uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap, - uint64_t start, uint64_t count); + uint64_t offset, uint64_t bytes); uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap); void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap, - uint8_t *buf, uint64_t start, - uint64_t count); + uint8_t *buf, uint64_t offset, + uint64_t bytes); void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap *bitmap, - uint8_t *buf, uint64_t start, - uint64_t count, bool finish); + uint8_t *buf, uint64_t offset, + uint64_t bytes, bool finish); void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap, - uint64_t start, uint64_t count, + uint64_t offset, uint64_t bytes, bool finish); void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap, - uint64_t start, uint64_t count, + uint64_t offset, uint64_t bytes, bool finish); void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap); From c7e7c87ac8e03fe088f4b1f820da30e85a07fd6b Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:15 -0500 Subject: [PATCH 10/54] qcow2: Switch sectors_covered_by_bitmap_cluster() to byte-based We are gradually converting to byte-based interfaces, as they are easier to reason about than sector-based. Change the qcow2 bitmap helper function sectors_covered_by_bitmap_cluster(), renaming it to bytes_covered_by_bitmap_cluster() in the process. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/qcow2-bitmap.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 9e799996e6..8324468b16 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -269,18 +269,16 @@ static int free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb) return 0; } -/* This function returns the number of disk sectors covered by a single qcow2 - * cluster of bitmap data. */ -static uint64_t sectors_covered_by_bitmap_cluster(const BDRVQcow2State *s, - const BdrvDirtyBitmap *bitmap) +/* Return the disk size covered by a single qcow2 cluster of bitmap data. */ +static uint64_t bytes_covered_by_bitmap_cluster(const BDRVQcow2State *s, + const BdrvDirtyBitmap *bitmap) { - uint64_t sector_granularity = - bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS; - uint64_t sbc = sector_granularity * (s->cluster_size << 3); + uint64_t granularity = bdrv_dirty_bitmap_granularity(bitmap); + uint64_t limit = granularity * (s->cluster_size << 3); - assert(QEMU_IS_ALIGNED(sbc << BDRV_SECTOR_BITS, + assert(QEMU_IS_ALIGNED(limit, bdrv_dirty_bitmap_serialization_align(bitmap))); - return sbc; + return limit; } /* load_bitmap_data @@ -293,7 +291,7 @@ static int load_bitmap_data(BlockDriverState *bs, { int ret = 0; BDRVQcow2State *s = bs->opaque; - uint64_t sector, sbc; + uint64_t sector, limit, sbc; uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap); uint64_t bm_sectors = DIV_ROUND_UP(bm_size, BDRV_SECTOR_SIZE); uint8_t *buf = NULL; @@ -306,7 +304,8 @@ static int load_bitmap_data(BlockDriverState *bs, } buf = g_malloc(s->cluster_size); - sbc = sectors_covered_by_bitmap_cluster(s, bitmap); + limit = bytes_covered_by_bitmap_cluster(s, bitmap); + sbc = limit >> BDRV_SECTOR_BITS; for (i = 0, sector = 0; i < tab_size; ++i, sector += sbc) { uint64_t count = MIN(bm_sectors - sector, sbc); uint64_t entry = bitmap_table[i]; @@ -1080,7 +1079,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, int ret; BDRVQcow2State *s = bs->opaque; int64_t sector; - uint64_t sbc; + uint64_t limit, sbc; uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap); uint64_t bm_sectors = DIV_ROUND_UP(bm_size, BDRV_SECTOR_SIZE); const char *bm_name = bdrv_dirty_bitmap_name(bitmap); @@ -1106,8 +1105,9 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, dbi = bdrv_dirty_iter_new(bitmap, 0); buf = g_malloc(s->cluster_size); - sbc = sectors_covered_by_bitmap_cluster(s, bitmap); - assert(DIV_ROUND_UP(bm_sectors, sbc) == tb_size); + limit = bytes_covered_by_bitmap_cluster(s, bitmap); + sbc = limit >> BDRV_SECTOR_BITS; + assert(DIV_ROUND_UP(bm_size, limit) == tb_size); while ((sector = bdrv_dirty_iter_next(dbi)) != -1) { uint64_t cluster = sector / sbc; From 715a74d819926af38bfeddb3ae29c9fe6b7736bb Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:16 -0500 Subject: [PATCH 11/54] dirty-bitmap: Set iterator start by offset, not sector All callers to bdrv_dirty_iter_new() passed 0 for their initial starting point, drop that parameter. Most callers to bdrv_set_dirty_iter() were scaling a byte offset to a sector number; the exception qcow2-bitmap will be converted later to use byte rather than sector iteration. Move the scaling to occur internally to dirty bitmap code instead, so that callers now pass in bytes. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/backup.c | 5 ++--- block/dirty-bitmap.c | 9 ++++----- block/mirror.c | 4 ++-- block/qcow2-bitmap.c | 4 ++-- include/block/dirty-bitmap.h | 5 ++--- 5 files changed, 12 insertions(+), 15 deletions(-) diff --git a/block/backup.c b/block/backup.c index 517c300a49..ac9c018717 100644 --- a/block/backup.c +++ b/block/backup.c @@ -372,7 +372,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap); clusters_per_iter = MAX((granularity / job->cluster_size), 1); - dbi = bdrv_dirty_iter_new(job->sync_bitmap, 0); + dbi = bdrv_dirty_iter_new(job->sync_bitmap); /* Find the next dirty sector(s) */ while ((offset = bdrv_dirty_iter_next(dbi) * BDRV_SECTOR_SIZE) >= 0) { @@ -403,8 +403,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) /* If the bitmap granularity is smaller than the backup granularity, * we need to advance the iterator pointer to the next cluster. */ if (granularity < job->cluster_size) { - bdrv_set_dirty_iter(dbi, - cluster * job->cluster_size / BDRV_SECTOR_SIZE); + bdrv_set_dirty_iter(dbi, cluster * job->cluster_size); } last_cluster = cluster - 1; diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 555c736024..84509476ba 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -473,11 +473,10 @@ uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap) return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); } -BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, - uint64_t first_sector) +BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap) { BdrvDirtyBitmapIter *iter = g_new(BdrvDirtyBitmapIter, 1); - hbitmap_iter_init(&iter->hbi, bitmap->bitmap, first_sector); + hbitmap_iter_init(&iter->hbi, bitmap->bitmap, 0); iter->bitmap = bitmap; bitmap->active_iterators++; return iter; @@ -645,9 +644,9 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, /** * Advance a BdrvDirtyBitmapIter to an arbitrary offset. */ -void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t sector_num) +void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t offset) { - hbitmap_iter_init(&iter->hbi, iter->hbi.hb, sector_num); + hbitmap_iter_init(&iter->hbi, iter->hbi.hb, offset >> BDRV_SECTOR_BITS); } int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) diff --git a/block/mirror.c b/block/mirror.c index 6f5cb9f26c..0c705e0b4f 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -373,7 +373,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE; if (next_dirty > next_offset || next_dirty < 0) { /* The bitmap iterator's cache is stale, refresh it */ - bdrv_set_dirty_iter(s->dbi, next_offset >> BDRV_SECTOR_BITS); + bdrv_set_dirty_iter(s->dbi, next_offset); next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE; } assert(next_dirty == next_offset); @@ -796,7 +796,7 @@ static void coroutine_fn mirror_run(void *opaque) } assert(!s->dbi); - s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap, 0); + s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap); for (;;) { uint64_t delay_ns = 0; int64_t cnt, delta; diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 8324468b16..90756cf561 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -1103,7 +1103,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, return NULL; } - dbi = bdrv_dirty_iter_new(bitmap, 0); + dbi = bdrv_dirty_iter_new(bitmap); buf = g_malloc(s->cluster_size); limit = bytes_covered_by_bitmap_cluster(s, bitmap); sbc = limit >> BDRV_SECTOR_BITS; @@ -1153,7 +1153,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, break; } - bdrv_set_dirty_iter(dbi, end); + bdrv_set_dirty_iter(dbi, end * BDRV_SECTOR_SIZE); } *bitmap_table_size = tb_size; diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index 5f34a1a3c7..757fc4c5b8 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -44,8 +44,7 @@ void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int64_t nr_sectors); BdrvDirtyBitmapIter *bdrv_dirty_meta_iter_new(BdrvDirtyBitmap *bitmap); -BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, - uint64_t first_sector); +BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap); void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter); uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap, @@ -80,7 +79,7 @@ void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int64_t nr_sectors); int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter); -void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num); +void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t offset); int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap); void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes); From f798184cfdcb7f92a38c5f717d675bd75e1fd3ac Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:17 -0500 Subject: [PATCH 12/54] dirty-bitmap: Change bdrv_dirty_iter_next() to report byte offset Thanks to recent cleanups, most callers were scaling a return value of sectors into bytes (the exception, in qcow2-bitmap, will be converted to byte-based iteration later). Update the interface to do the scaling internally instead. In qcow2-bitmap, the code was specifically checking for an error return of -1. To avoid a regression, we either have to make sure we continue to return -1 (rather than a scaled -512) on error, or we have to fix the caller to treat all negative values as error rather than just one magic value. It's easy enough to make both changes at the same time, even though either one in isolation would work. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/backup.c | 2 +- block/dirty-bitmap.c | 3 ++- block/mirror.c | 8 ++++---- block/qcow2-bitmap.c | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/block/backup.c b/block/backup.c index ac9c018717..06ddbfd03d 100644 --- a/block/backup.c +++ b/block/backup.c @@ -375,7 +375,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) dbi = bdrv_dirty_iter_new(job->sync_bitmap); /* Find the next dirty sector(s) */ - while ((offset = bdrv_dirty_iter_next(dbi) * BDRV_SECTOR_SIZE) >= 0) { + while ((offset = bdrv_dirty_iter_next(dbi)) >= 0) { cluster = offset / job->cluster_size; /* Fake progress updates for any clusters we skipped */ diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 84509476ba..e451916187 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -503,7 +503,8 @@ void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter) int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter) { - return hbitmap_iter_next(&iter->hbi); + int64_t ret = hbitmap_iter_next(&iter->hbi); + return ret < 0 ? -1 : ret * BDRV_SECTOR_SIZE; } /* Called within bdrv_dirty_bitmap_lock..unlock */ diff --git a/block/mirror.c b/block/mirror.c index 0c705e0b4f..de0a02778c 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -336,10 +336,10 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES); bdrv_dirty_bitmap_lock(s->dirty_bitmap); - offset = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE; + offset = bdrv_dirty_iter_next(s->dbi); if (offset < 0) { bdrv_set_dirty_iter(s->dbi, 0); - offset = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE; + offset = bdrv_dirty_iter_next(s->dbi); trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap) * BDRV_SECTOR_SIZE); assert(offset >= 0); @@ -370,11 +370,11 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) break; } - next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE; + next_dirty = bdrv_dirty_iter_next(s->dbi); if (next_dirty > next_offset || next_dirty < 0) { /* The bitmap iterator's cache is stale, refresh it */ bdrv_set_dirty_iter(s->dbi, next_offset); - next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE; + next_dirty = bdrv_dirty_iter_next(s->dbi); } assert(next_dirty == next_offset); nb_chunks++; diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 90756cf561..2d8dcba3e8 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -1109,7 +1109,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, sbc = limit >> BDRV_SECTOR_BITS; assert(DIV_ROUND_UP(bm_size, limit) == tb_size); - while ((sector = bdrv_dirty_iter_next(dbi)) != -1) { + while ((sector = bdrv_dirty_iter_next(dbi) >> BDRV_SECTOR_BITS) >= 0) { uint64_t cluster = sector / sbc; uint64_t end, write_size; int64_t off; From 9a46dba7b76f5198555819905d1d8235947ba98f Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:18 -0500 Subject: [PATCH 13/54] dirty-bitmap: Change bdrv_get_dirty_count() to report bytes Thanks to recent cleanups, all callers were scaling a return value of sectors into bytes; do the scaling internally instead. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 4 ++-- block/mirror.c | 16 ++++++---------- migration/block.c | 2 +- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index e451916187..8322e23f0d 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -423,7 +423,7 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1); BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1); - info->count = bdrv_get_dirty_count(bm) << BDRV_SECTOR_BITS; + info->count = bdrv_get_dirty_count(bm); info->granularity = bdrv_dirty_bitmap_granularity(bm); info->has_name = !!bm->name; info->name = g_strdup(bm->name); @@ -652,7 +652,7 @@ void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t offset) int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) { - return hbitmap_count(bitmap->bitmap); + return hbitmap_count(bitmap->bitmap) << BDRV_SECTOR_BITS; } int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap) diff --git a/block/mirror.c b/block/mirror.c index de0a02778c..c5212d2c8d 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -340,8 +340,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) if (offset < 0) { bdrv_set_dirty_iter(s->dbi, 0); offset = bdrv_dirty_iter_next(s->dbi); - trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap) * - BDRV_SECTOR_SIZE); + trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap)); assert(offset >= 0); } bdrv_dirty_bitmap_unlock(s->dirty_bitmap); @@ -811,11 +810,10 @@ static void coroutine_fn mirror_run(void *opaque) cnt = bdrv_get_dirty_count(s->dirty_bitmap); /* s->common.offset contains the number of bytes already processed so - * far, cnt is the number of dirty sectors remaining and + * far, cnt is the number of dirty bytes remaining and * s->bytes_in_flight is the number of bytes currently being * processed; together those are the current total operation length */ - s->common.len = s->common.offset + s->bytes_in_flight + - cnt * BDRV_SECTOR_SIZE; + s->common.len = s->common.offset + s->bytes_in_flight + cnt; /* Note that even when no rate limit is applied we need to yield * periodically with no pending I/O so that bdrv_drain_all() returns. @@ -827,8 +825,7 @@ static void coroutine_fn mirror_run(void *opaque) s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) { if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || (cnt == 0 && s->in_flight > 0)) { - trace_mirror_yield(s, cnt * BDRV_SECTOR_SIZE, - s->buf_free_count, s->in_flight); + trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); mirror_wait_for_io(s); continue; } else if (cnt != 0) { @@ -869,7 +866,7 @@ static void coroutine_fn mirror_run(void *opaque) * whether to switch to target check one last time if I/O has * come in the meanwhile, and if not flush the data to disk. */ - trace_mirror_before_drain(s, cnt * BDRV_SECTOR_SIZE); + trace_mirror_before_drain(s, cnt); bdrv_drained_begin(bs); cnt = bdrv_get_dirty_count(s->dirty_bitmap); @@ -888,8 +885,7 @@ static void coroutine_fn mirror_run(void *opaque) } ret = 0; - trace_mirror_before_sleep(s, cnt * BDRV_SECTOR_SIZE, - s->synced, delay_ns); + trace_mirror_before_sleep(s, cnt, s->synced, delay_ns); if (!s->synced) { block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns); if (block_job_is_cancelled(&s->common)) { diff --git a/migration/block.c b/migration/block.c index 606ad4db92..cbcadce168 100644 --- a/migration/block.c +++ b/migration/block.c @@ -672,7 +672,7 @@ static int64_t get_remaining_dirty(void) aio_context_release(blk_get_aio_context(bmds->blk)); } - return dirty << BDRV_SECTOR_BITS; + return dirty; } From 3b5d4df0c6b52746c6194bd2ea65828822db8438 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:19 -0500 Subject: [PATCH 14/54] dirty-bitmap: Change bdrv_get_dirty_locked() to take bytes Half the callers were already scaling bytes to sectors; the other half can eventually be simplified to use byte iteration. Both callers were already using the result as a bool, so make that explicit. Making the change also makes it easier for a future dirty-bitmap patch to offload scaling over to the internal hbitmap. Remember, asking whether a byte is dirty is effectively asking whether the entire granularity containing the byte is dirty, since we only track dirtiness by granularity. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Juan Quintela Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 8 ++++---- block/mirror.c | 3 +-- include/block/dirty-bitmap.h | 4 ++-- migration/block.c | 3 ++- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 8322e23f0d..ad559c62b1 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -438,13 +438,13 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) } /* Called within bdrv_dirty_bitmap_lock..unlock */ -int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, - int64_t sector) +bool bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t offset) { if (bitmap) { - return hbitmap_get(bitmap->bitmap, sector); + return hbitmap_get(bitmap->bitmap, offset >> BDRV_SECTOR_BITS); } else { - return 0; + return false; } } diff --git a/block/mirror.c b/block/mirror.c index c5212d2c8d..545dfc50aa 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -361,8 +361,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) int64_t next_offset = offset + nb_chunks * s->granularity; int64_t next_chunk = next_offset / s->granularity; if (next_offset >= s->bdev_length || - !bdrv_get_dirty_locked(source, s->dirty_bitmap, - next_offset >> BDRV_SECTOR_BITS)) { + !bdrv_get_dirty_locked(source, s->dirty_bitmap, next_offset)) { break; } if (test_bit(next_chunk, s->in_flight_bitmap)) { diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index 757fc4c5b8..9e39537e4b 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -72,8 +72,8 @@ void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap *bitmap, /* Functions that require manual locking. */ void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap); void bdrv_dirty_bitmap_unlock(BdrvDirtyBitmap *bitmap); -int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, - int64_t sector); +bool bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t offset); void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int64_t nr_sectors); void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, diff --git a/migration/block.c b/migration/block.c index cbcadce168..f5bcf5505c 100644 --- a/migration/block.c +++ b/migration/block.c @@ -535,7 +535,8 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, blk_mig_unlock(); } bdrv_dirty_bitmap_lock(bmds->dirty_bitmap); - if (bdrv_get_dirty_locked(bs, bmds->dirty_bitmap, sector)) { + if (bdrv_get_dirty_locked(bs, bmds->dirty_bitmap, + sector * BDRV_SECTOR_SIZE)) { if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) { nr_sectors = total_sectors - sector; } else { From e0d7f73e63427521aabd7311a86fe90fd02897c0 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:20 -0500 Subject: [PATCH 15/54] dirty-bitmap: Change bdrv_[re]set_dirty_bitmap() to use bytes Some of the callers were already scaling bytes to sectors; others can be easily converted to pass byte offsets, all in our shift towards a consistent byte interface everywhere. Making the change will also make it easier to write the hold-out callers to use byte rather than sectors for their iterations; it also makes it easier for a future dirty-bitmap patch to offload scaling over to the internal hbitmap. Although all callers happen to pass sector-aligned values, make the internal scaling robust to any sub-sector requests. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 22 ++++++++++++++-------- block/mirror.c | 16 ++++++++-------- include/block/dirty-bitmap.h | 8 ++++---- migration/block.c | 7 +++++-- 4 files changed, 31 insertions(+), 22 deletions(-) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index ad559c62b1..117837b3cc 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -509,35 +509,41 @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter) /* Called within bdrv_dirty_bitmap_lock..unlock */ void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int64_t nr_sectors) + int64_t offset, int64_t bytes) { + int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); + assert(bdrv_dirty_bitmap_enabled(bitmap)); assert(!bdrv_dirty_bitmap_readonly(bitmap)); - hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); + hbitmap_set(bitmap->bitmap, offset >> BDRV_SECTOR_BITS, + end_sector - (offset >> BDRV_SECTOR_BITS)); } void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int64_t nr_sectors) + int64_t offset, int64_t bytes) { bdrv_dirty_bitmap_lock(bitmap); - bdrv_set_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors); + bdrv_set_dirty_bitmap_locked(bitmap, offset, bytes); bdrv_dirty_bitmap_unlock(bitmap); } /* Called within bdrv_dirty_bitmap_lock..unlock */ void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int64_t nr_sectors) + int64_t offset, int64_t bytes) { + int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); + assert(bdrv_dirty_bitmap_enabled(bitmap)); assert(!bdrv_dirty_bitmap_readonly(bitmap)); - hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); + hbitmap_reset(bitmap->bitmap, offset >> BDRV_SECTOR_BITS, + end_sector - (offset >> BDRV_SECTOR_BITS)); } void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int64_t nr_sectors) + int64_t offset, int64_t bytes) { bdrv_dirty_bitmap_lock(bitmap); - bdrv_reset_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors); + bdrv_reset_dirty_bitmap_locked(bitmap, offset, bytes); bdrv_dirty_bitmap_unlock(bitmap); } diff --git a/block/mirror.c b/block/mirror.c index 545dfc50aa..5c0f79f56a 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -141,8 +141,7 @@ static void mirror_write_complete(void *opaque, int ret) if (ret < 0) { BlockErrorAction action; - bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset >> BDRV_SECTOR_BITS, - op->bytes >> BDRV_SECTOR_BITS); + bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset, op->bytes); action = mirror_error_action(s, false, -ret); if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) { s->ret = ret; @@ -161,8 +160,7 @@ static void mirror_read_complete(void *opaque, int ret) if (ret < 0) { BlockErrorAction action; - bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset >> BDRV_SECTOR_BITS, - op->bytes >> BDRV_SECTOR_BITS); + bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset, op->bytes); action = mirror_error_action(s, true, -ret); if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) { s->ret = ret; @@ -382,8 +380,8 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) * calling bdrv_get_block_status_above could yield - if some blocks are * marked dirty in this window, we need to know. */ - bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, offset >> BDRV_SECTOR_BITS, - nb_chunks * sectors_per_chunk); + bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, offset, + nb_chunks * s->granularity); bdrv_dirty_bitmap_unlock(s->dirty_bitmap); bitmap_set(s->in_flight_bitmap, offset / s->granularity, nb_chunks); @@ -625,7 +623,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) if (base == NULL && !bdrv_has_zero_init(target_bs)) { if (!bdrv_can_write_zeroes_with_unmap(target_bs)) { - bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, end); + bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length); return 0; } @@ -681,7 +679,9 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) n = count >> BDRV_SECTOR_BITS; assert(n > 0); if (ret == 1) { - bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n); + bdrv_set_dirty_bitmap(s->dirty_bitmap, + sector_num * BDRV_SECTOR_SIZE, + n * BDRV_SECTOR_SIZE); } sector_num += n; } diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index 9e39537e4b..3579a7597c 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -40,9 +40,9 @@ const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap); int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap); DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap); void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int64_t nr_sectors); + int64_t offset, int64_t bytes); void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int64_t nr_sectors); + int64_t offset, int64_t bytes); BdrvDirtyBitmapIter *bdrv_dirty_meta_iter_new(BdrvDirtyBitmap *bitmap); BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap); void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter); @@ -75,9 +75,9 @@ void bdrv_dirty_bitmap_unlock(BdrvDirtyBitmap *bitmap); bool bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t offset); void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int64_t nr_sectors); + int64_t offset, int64_t bytes); void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int64_t nr_sectors); + int64_t offset, int64_t bytes); int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter); void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t offset); int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); diff --git a/migration/block.c b/migration/block.c index f5bcf5505c..3282809583 100644 --- a/migration/block.c +++ b/migration/block.c @@ -334,7 +334,8 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) blk->aiocb = blk_aio_preadv(bb, cur_sector * BDRV_SECTOR_SIZE, &blk->qiov, 0, blk_mig_read_cb, blk); - bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, cur_sector, nr_sectors); + bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, cur_sector * BDRV_SECTOR_SIZE, + nr_sectors * BDRV_SECTOR_SIZE); aio_context_release(blk_get_aio_context(bmds->blk)); qemu_mutex_unlock_iothread(); @@ -542,7 +543,9 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, } else { nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK; } - bdrv_reset_dirty_bitmap_locked(bmds->dirty_bitmap, sector, nr_sectors); + bdrv_reset_dirty_bitmap_locked(bmds->dirty_bitmap, + sector * BDRV_SECTOR_SIZE, + nr_sectors * BDRV_SECTOR_SIZE); bdrv_dirty_bitmap_unlock(bmds->dirty_bitmap); blk = g_new(BlkMigBlock, 1); From 23ca459a455c83ffadb03ab1eedf0b6cff62bfeb Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:21 -0500 Subject: [PATCH 16/54] mirror: Switch mirror_dirty_init() to byte-based iteration Now that we have adjusted the majority of the calls this function makes to be byte-based, it is easier to read the code if it makes passes over the image using bytes rather than sectors. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/mirror.c | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index 5c0f79f56a..459b80f8f3 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -612,15 +612,13 @@ static void mirror_throttle(MirrorBlockJob *s) static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) { - int64_t sector_num, end; + int64_t offset; BlockDriverState *base = s->base; BlockDriverState *bs = s->source; BlockDriverState *target_bs = blk_bs(s->target); - int ret, n; + int ret; int64_t count; - end = s->bdev_length / BDRV_SECTOR_SIZE; - if (base == NULL && !bdrv_has_zero_init(target_bs)) { if (!bdrv_can_write_zeroes_with_unmap(target_bs)) { bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length); @@ -628,9 +626,9 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) } s->initial_zeroing_ongoing = true; - for (sector_num = 0; sector_num < end; ) { - int nb_sectors = MIN(end - sector_num, - QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS); + for (offset = 0; offset < s->bdev_length; ) { + int bytes = MIN(s->bdev_length - offset, + QEMU_ALIGN_DOWN(INT_MAX, s->granularity)); mirror_throttle(s); @@ -646,9 +644,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) continue; } - mirror_do_zero_or_discard(s, sector_num * BDRV_SECTOR_SIZE, - nb_sectors * BDRV_SECTOR_SIZE, false); - sector_num += nb_sectors; + mirror_do_zero_or_discard(s, offset, bytes, false); + offset += bytes; } mirror_wait_for_all_io(s); @@ -656,10 +653,10 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) } /* First part, loop on the sectors and initialize the dirty bitmap. */ - for (sector_num = 0; sector_num < end; ) { + for (offset = 0; offset < s->bdev_length; ) { /* Just to make sure we are not exceeding int limit. */ - int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS, - end - sector_num); + int bytes = MIN(s->bdev_length - offset, + QEMU_ALIGN_DOWN(INT_MAX, s->granularity)); mirror_throttle(s); @@ -667,23 +664,16 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) return 0; } - ret = bdrv_is_allocated_above(bs, base, sector_num * BDRV_SECTOR_SIZE, - nb_sectors * BDRV_SECTOR_SIZE, &count); + ret = bdrv_is_allocated_above(bs, base, offset, bytes, &count); if (ret < 0) { return ret; } - /* TODO: Relax this once bdrv_is_allocated_above and dirty - * bitmaps no longer require sector alignment. */ - assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE)); - n = count >> BDRV_SECTOR_BITS; - assert(n > 0); + assert(count); if (ret == 1) { - bdrv_set_dirty_bitmap(s->dirty_bitmap, - sector_num * BDRV_SECTOR_SIZE, - n * BDRV_SECTOR_SIZE); + bdrv_set_dirty_bitmap(s->dirty_bitmap, offset, count); } - sector_num += n; + offset += count; } return 0; } From b85ee45334585c49c5489a303336df119408763a Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:22 -0500 Subject: [PATCH 17/54] qcow2: Switch qcow2_measure() to byte-based iteration This is new code, but it is easier to read if it makes passes over the image using bytes rather than sectors (and will get easier in the future when bdrv_get_block_status is converted to byte-based). Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/qcow2.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 970006fc1d..b8da8ca105 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3673,20 +3673,19 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, */ required = virtual_size; } else { - int cluster_sectors = cluster_size / BDRV_SECTOR_SIZE; - int64_t sector_num; + int64_t offset; int pnum = 0; - for (sector_num = 0; - sector_num < ssize / BDRV_SECTOR_SIZE; - sector_num += pnum) { - int nb_sectors = MIN(ssize / BDRV_SECTOR_SIZE - sector_num, - BDRV_REQUEST_MAX_SECTORS); + for (offset = 0; offset < ssize; + offset += pnum * BDRV_SECTOR_SIZE) { + int nb_sectors = MIN(ssize - offset, + BDRV_REQUEST_MAX_BYTES) / BDRV_SECTOR_SIZE; BlockDriverState *file; int64_t ret; ret = bdrv_get_block_status_above(in_bs, NULL, - sector_num, nb_sectors, + offset >> BDRV_SECTOR_BITS, + nb_sectors, &pnum, &file); if (ret < 0) { error_setg_errno(&local_err, -ret, @@ -3699,12 +3698,11 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) == (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) { /* Extend pnum to end of cluster for next iteration */ - pnum = ROUND_UP(sector_num + pnum, cluster_sectors) - - sector_num; + pnum = (ROUND_UP(offset + pnum * BDRV_SECTOR_SIZE, + cluster_size) - offset) >> BDRV_SECTOR_BITS; /* Count clusters we've seen */ - required += (sector_num % cluster_sectors + pnum) * - BDRV_SECTOR_SIZE; + required += offset % cluster_size + pnum * BDRV_SECTOR_SIZE; } } } From ab94db6f7609e562b5f68cc71807d5d50e24e224 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:23 -0500 Subject: [PATCH 18/54] qcow2: Switch load_bitmap_data() to byte-based iteration Now that we have adjusted the majority of the calls this function makes to be byte-based, it is easier to read the code if it makes passes over the image using bytes rather than sectors. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/qcow2-bitmap.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 2d8dcba3e8..02512a21f2 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -291,9 +291,8 @@ static int load_bitmap_data(BlockDriverState *bs, { int ret = 0; BDRVQcow2State *s = bs->opaque; - uint64_t sector, limit, sbc; + uint64_t offset, limit; uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap); - uint64_t bm_sectors = DIV_ROUND_UP(bm_size, BDRV_SECTOR_SIZE); uint8_t *buf = NULL; uint64_t i, tab_size = size_to_clusters(s, @@ -305,32 +304,27 @@ static int load_bitmap_data(BlockDriverState *bs, buf = g_malloc(s->cluster_size); limit = bytes_covered_by_bitmap_cluster(s, bitmap); - sbc = limit >> BDRV_SECTOR_BITS; - for (i = 0, sector = 0; i < tab_size; ++i, sector += sbc) { - uint64_t count = MIN(bm_sectors - sector, sbc); + for (i = 0, offset = 0; i < tab_size; ++i, offset += limit) { + uint64_t count = MIN(bm_size - offset, limit); uint64_t entry = bitmap_table[i]; - uint64_t offset = entry & BME_TABLE_ENTRY_OFFSET_MASK; + uint64_t data_offset = entry & BME_TABLE_ENTRY_OFFSET_MASK; assert(check_table_entry(entry, s->cluster_size) == 0); - if (offset == 0) { + if (data_offset == 0) { if (entry & BME_TABLE_ENTRY_FLAG_ALL_ONES) { - bdrv_dirty_bitmap_deserialize_ones(bitmap, - sector * BDRV_SECTOR_SIZE, - count * BDRV_SECTOR_SIZE, + bdrv_dirty_bitmap_deserialize_ones(bitmap, offset, count, false); } else { /* No need to deserialize zeros because the dirty bitmap is * already cleared */ } } else { - ret = bdrv_pread(bs->file, offset, buf, s->cluster_size); + ret = bdrv_pread(bs->file, data_offset, buf, s->cluster_size); if (ret < 0) { goto finish; } - bdrv_dirty_bitmap_deserialize_part(bitmap, buf, - sector * BDRV_SECTOR_SIZE, - count * BDRV_SECTOR_SIZE, + bdrv_dirty_bitmap_deserialize_part(bitmap, buf, offset, count, false); } } From 49d741b5041b79214db58f364cebe2f367517711 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:24 -0500 Subject: [PATCH 19/54] qcow2: Switch store_bitmap_data() to byte-based iteration Now that we have adjusted the majority of the calls this function makes to be byte-based, it is easier to read the code if it makes passes over the image using bytes rather than sectors. iotests 165 was rather weak - on a default 64k-cluster image, where bitmap granularity also defaults to 64k bytes, a single cluster of the bitmap table thus covers (64*1024*8) bits which each cover 64k bytes, or 32G of image space. But the test only uses a 1G image, so it cannot trigger any more than one loop of the code in store_bitmap_data(); and it was writing to the first cluster. In order to test that we are properly aligning which portions of the bitmap are being written to the file, we really want to test a case where the first dirty bit returned by bdrv_dirty_iter_next() is not aligned to the start of a cluster, which we can do by modifying the test to write data that doesn't happen to fall in the first cluster of the image. Signed-off-by: Eric Blake Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: John Snow Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/qcow2-bitmap.c | 31 ++++++++++++++++--------------- tests/qemu-iotests/165 | 2 +- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 02512a21f2..f45e46cfbd 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -1072,10 +1072,9 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, { int ret; BDRVQcow2State *s = bs->opaque; - int64_t sector; - uint64_t limit, sbc; + int64_t offset; + uint64_t limit; uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap); - uint64_t bm_sectors = DIV_ROUND_UP(bm_size, BDRV_SECTOR_SIZE); const char *bm_name = bdrv_dirty_bitmap_name(bitmap); uint8_t *buf = NULL; BdrvDirtyBitmapIter *dbi; @@ -1100,18 +1099,22 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, dbi = bdrv_dirty_iter_new(bitmap); buf = g_malloc(s->cluster_size); limit = bytes_covered_by_bitmap_cluster(s, bitmap); - sbc = limit >> BDRV_SECTOR_BITS; assert(DIV_ROUND_UP(bm_size, limit) == tb_size); - while ((sector = bdrv_dirty_iter_next(dbi) >> BDRV_SECTOR_BITS) >= 0) { - uint64_t cluster = sector / sbc; + while ((offset = bdrv_dirty_iter_next(dbi)) >= 0) { + uint64_t cluster = offset / limit; uint64_t end, write_size; int64_t off; - sector = cluster * sbc; - end = MIN(bm_sectors, sector + sbc); - write_size = bdrv_dirty_bitmap_serialization_size(bitmap, - sector * BDRV_SECTOR_SIZE, (end - sector) * BDRV_SECTOR_SIZE); + /* + * We found the first dirty offset, but want to write out the + * entire cluster of the bitmap that includes that offset, + * including any leading zero bits. + */ + offset = QEMU_ALIGN_DOWN(offset, limit); + end = MIN(bm_size, offset + limit); + write_size = bdrv_dirty_bitmap_serialization_size(bitmap, offset, + end - offset); assert(write_size <= s->cluster_size); off = qcow2_alloc_clusters(bs, s->cluster_size); @@ -1123,9 +1126,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, } tb[cluster] = off; - bdrv_dirty_bitmap_serialize_part(bitmap, buf, - sector * BDRV_SECTOR_SIZE, - (end - sector) * BDRV_SECTOR_SIZE); + bdrv_dirty_bitmap_serialize_part(bitmap, buf, offset, end - offset); if (write_size < s->cluster_size) { memset(buf + write_size, 0, s->cluster_size - write_size); } @@ -1143,11 +1144,11 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, goto fail; } - if (end >= bm_sectors) { + if (end >= bm_size) { break; } - bdrv_set_dirty_iter(dbi, end * BDRV_SECTOR_SIZE); + bdrv_set_dirty_iter(dbi, end); } *bitmap_table_size = tb_size; diff --git a/tests/qemu-iotests/165 b/tests/qemu-iotests/165 index 74d7b79a0b..a3932db3de 100755 --- a/tests/qemu-iotests/165 +++ b/tests/qemu-iotests/165 @@ -27,7 +27,7 @@ disk = os.path.join(iotests.test_dir, 'disk') disk_size = 0x40000000 # 1G # regions for qemu_io: (start, count) in bytes -regions1 = ((0, 0x100000), +regions1 = ((0x0fff00, 0x10000), (0x200000, 0x100000)) regions2 = ((0x10000000, 0x20000), From 0fdf1a4f68e5f27df9bb10a7223bd519c8d30e31 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:25 -0500 Subject: [PATCH 20/54] dirty-bitmap: Switch bdrv_set_dirty() to bytes Both callers already had bytes available, but were scaling to sectors. Move the scaling to internal code. In the case of bdrv_aligned_pwritev(), we are now passing the exact offset rather than a rounded sector-aligned value, but that's okay as long as dirty bitmap widens start/bytes to granularity boundaries. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 7 ++++--- block/io.c | 6 ++---- include/block/block_int.h | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 117837b3cc..58a3f330a9 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -628,10 +628,10 @@ void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap) hbitmap_deserialize_finish(bitmap->bitmap); } -void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, - int64_t nr_sectors) +void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes) { BdrvDirtyBitmap *bitmap; + int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); if (QLIST_EMPTY(&bs->dirty_bitmaps)) { return; @@ -643,7 +643,8 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, continue; } assert(!bdrv_dirty_bitmap_readonly(bitmap)); - hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); + hbitmap_set(bitmap->bitmap, offset >> BDRV_SECTOR_BITS, + end_sector - (offset >> BDRV_SECTOR_BITS)); } bdrv_dirty_bitmaps_unlock(bs); } diff --git a/block/io.c b/block/io.c index d633b0f851..e0f904583f 100644 --- a/block/io.c +++ b/block/io.c @@ -1334,7 +1334,6 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, bool waited; int ret; - int64_t start_sector = offset >> BDRV_SECTOR_BITS; int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); uint64_t bytes_remaining = bytes; int max_transfer; @@ -1409,7 +1408,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE); atomic_inc(&bs->write_gen); - bdrv_set_dirty(bs, start_sector, end_sector - start_sector); + bdrv_set_dirty(bs, offset, bytes); stat64_max(&bs->wr_highest_offset, offset + bytes); @@ -2438,8 +2437,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, ret = 0; out: atomic_inc(&bs->write_gen); - bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS, - req.bytes >> BDRV_SECTOR_BITS); + bdrv_set_dirty(bs, req.offset, req.bytes); tracked_request_end(&req); bdrv_dec_in_flight(bs); return ret; diff --git a/include/block/block_int.h b/include/block/block_int.h index 99abe2ce74..79366b94b5 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -1028,7 +1028,7 @@ void blk_dev_eject_request(BlockBackend *blk, bool force); bool blk_dev_is_tray_open(BlockBackend *blk); bool blk_dev_is_medium_locked(BlockBackend *blk); -void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int64_t nr_sect); +void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes); bool bdrv_requests_pending(BlockDriverState *bs); void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); From ca759622441da1d19107efaa62d9d63fb83ebb79 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:26 -0500 Subject: [PATCH 21/54] dirty-bitmap: Convert internal hbitmap size/granularity Now that all callers are using byte-based interfaces, there's no reason for our internal hbitmap to remain with sector-based granularity. It also simplifies our internal scaling, since we already know that hbitmap widens requests out to granularity boundaries. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 62 +++++++++++++------------------------------- 1 file changed, 18 insertions(+), 44 deletions(-) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 58a3f330a9..bd04e991b1 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -38,7 +38,7 @@ */ struct BdrvDirtyBitmap { QemuMutex *mutex; - HBitmap *bitmap; /* Dirty sector bitmap implementation */ + HBitmap *bitmap; /* Dirty bitmap implementation */ HBitmap *meta; /* Meta dirty bitmap */ BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ char *name; /* Optional non-empty unique ID */ @@ -130,12 +130,7 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, } bitmap = g_new0(BdrvDirtyBitmap, 1); bitmap->mutex = &bs->dirty_bitmap_mutex; - /* - * TODO - let hbitmap track full granularity. For now, it is tracking - * only sector granularity, as a shortcut for our iterators. - */ - bitmap->bitmap = hbitmap_alloc(DIV_ROUND_UP(bitmap_size, BDRV_SECTOR_SIZE), - ctz32(granularity) - BDRV_SECTOR_BITS); + bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(granularity)); bitmap->size = bitmap_size; bitmap->name = g_strdup(name); bitmap->disabled = false; @@ -312,7 +307,7 @@ void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes) QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { assert(!bdrv_dirty_bitmap_frozen(bitmap)); assert(!bitmap->active_iterators); - hbitmap_truncate(bitmap->bitmap, DIV_ROUND_UP(bytes, BDRV_SECTOR_SIZE)); + hbitmap_truncate(bitmap->bitmap, bytes); bitmap->size = bytes; } bdrv_dirty_bitmaps_unlock(bs); @@ -442,7 +437,7 @@ bool bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t offset) { if (bitmap) { - return hbitmap_get(bitmap->bitmap, offset >> BDRV_SECTOR_BITS); + return hbitmap_get(bitmap->bitmap, offset); } else { return false; } @@ -470,7 +465,7 @@ uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs) uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap) { - return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); + return 1U << hbitmap_granularity(bitmap->bitmap); } BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap) @@ -503,20 +498,16 @@ void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter) int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter) { - int64_t ret = hbitmap_iter_next(&iter->hbi); - return ret < 0 ? -1 : ret * BDRV_SECTOR_SIZE; + return hbitmap_iter_next(&iter->hbi); } /* Called within bdrv_dirty_bitmap_lock..unlock */ void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, int64_t offset, int64_t bytes) { - int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); - assert(bdrv_dirty_bitmap_enabled(bitmap)); assert(!bdrv_dirty_bitmap_readonly(bitmap)); - hbitmap_set(bitmap->bitmap, offset >> BDRV_SECTOR_BITS, - end_sector - (offset >> BDRV_SECTOR_BITS)); + hbitmap_set(bitmap->bitmap, offset, bytes); } void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, @@ -531,12 +522,9 @@ void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, int64_t offset, int64_t bytes) { - int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); - assert(bdrv_dirty_bitmap_enabled(bitmap)); assert(!bdrv_dirty_bitmap_readonly(bitmap)); - hbitmap_reset(bitmap->bitmap, offset >> BDRV_SECTOR_BITS, - end_sector - (offset >> BDRV_SECTOR_BITS)); + hbitmap_reset(bitmap->bitmap, offset, bytes); } void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, @@ -556,8 +544,7 @@ void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out) hbitmap_reset_all(bitmap->bitmap); } else { HBitmap *backup = bitmap->bitmap; - bitmap->bitmap = hbitmap_alloc(DIV_ROUND_UP(bitmap->size, - BDRV_SECTOR_SIZE), + bitmap->bitmap = hbitmap_alloc(bitmap->size, hbitmap_granularity(backup)); *out = backup; } @@ -576,51 +563,40 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in) uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap, uint64_t offset, uint64_t bytes) { - assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); - return hbitmap_serialization_size(bitmap->bitmap, - offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS); + return hbitmap_serialization_size(bitmap->bitmap, offset, bytes); } uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap) { - return hbitmap_serialization_align(bitmap->bitmap) * BDRV_SECTOR_SIZE; + return hbitmap_serialization_align(bitmap->bitmap); } void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap, uint8_t *buf, uint64_t offset, uint64_t bytes) { - assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); - hbitmap_serialize_part(bitmap->bitmap, buf, offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS); + hbitmap_serialize_part(bitmap->bitmap, buf, offset, bytes); } void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap *bitmap, uint8_t *buf, uint64_t offset, uint64_t bytes, bool finish) { - assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); - hbitmap_deserialize_part(bitmap->bitmap, buf, offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS, finish); + hbitmap_deserialize_part(bitmap->bitmap, buf, offset, bytes, finish); } void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap, uint64_t offset, uint64_t bytes, bool finish) { - assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); - hbitmap_deserialize_zeroes(bitmap->bitmap, offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS, finish); + hbitmap_deserialize_zeroes(bitmap->bitmap, offset, bytes, finish); } void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap, uint64_t offset, uint64_t bytes, bool finish) { - assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); - hbitmap_deserialize_ones(bitmap->bitmap, offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS, finish); + hbitmap_deserialize_ones(bitmap->bitmap, offset, bytes, finish); } void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap) @@ -631,7 +607,6 @@ void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap) void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes) { BdrvDirtyBitmap *bitmap; - int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); if (QLIST_EMPTY(&bs->dirty_bitmaps)) { return; @@ -643,8 +618,7 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes) continue; } assert(!bdrv_dirty_bitmap_readonly(bitmap)); - hbitmap_set(bitmap->bitmap, offset >> BDRV_SECTOR_BITS, - end_sector - (offset >> BDRV_SECTOR_BITS)); + hbitmap_set(bitmap->bitmap, offset, bytes); } bdrv_dirty_bitmaps_unlock(bs); } @@ -654,12 +628,12 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes) */ void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t offset) { - hbitmap_iter_init(&iter->hbi, iter->hbi.hb, offset >> BDRV_SECTOR_BITS); + hbitmap_iter_init(&iter->hbi, iter->hbi.hb, offset); } int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) { - return hbitmap_count(bitmap->bitmap) << BDRV_SECTOR_BITS; + return hbitmap_count(bitmap->bitmap); } int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap) From dbfa934106d22402d809d039e773b50ab1885477 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 3 Oct 2017 11:57:24 +0200 Subject: [PATCH 22/54] hw/block/onenand: Remove dead code block The condition of the for-loop makes sure that b is always smaller than s->blocks, so the "if (b >= s->blocks)" statement is completely superfluous here. Buglink: https://bugs.launchpad.net/qemu/+bug/1715007 Signed-off-by: Thomas Huth Reviewed-by: Laurent Vivier Signed-off-by: Kevin Wolf --- hw/block/onenand.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hw/block/onenand.c b/hw/block/onenand.c index 30e40f3914..de65c9ebb9 100644 --- a/hw/block/onenand.c +++ b/hw/block/onenand.c @@ -520,10 +520,6 @@ static void onenand_command(OneNANDState *s) s->intstatus |= ONEN_INT; for (b = 0; b < s->blocks; b ++) { - if (b >= s->blocks) { - s->status |= ONEN_ERR_CMD; - break; - } if (s->blockwp[b] == ONEN_LOCK_LOCKTIGHTEN) break; From f06a8dcfc6d51cdcd51f4320b895680cbbc872a9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Sep 2017 16:44:50 +0200 Subject: [PATCH 23/54] qemu-iotests: remove dead code This includes shell function, shell variables and command line options (randomize.awk does not exist). Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/check | 28 ------------- tests/qemu-iotests/common | 23 ----------- tests/qemu-iotests/common.config | 26 ------------ tests/qemu-iotests/common.rc | 68 -------------------------------- 4 files changed, 145 deletions(-) diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index 4583a0c269..041780b09f 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -65,7 +65,6 @@ then export SOCKET_SCM_HELPER="$build_iotests/socket_scm_helper" fi -# if ./qemu exists, it should be prioritized and will be chosen by common.config if [[ -z "$QEMU_PROG" && ! -x './qemu' ]] then arch=$(uname -m 2> /dev/null) @@ -140,12 +139,6 @@ _timestamp() _wrapup() { - # for hangcheck ... - # remove files that were used by hangcheck - # - [ -f "${TEST_DIR}"/check.pid ] && rm -rf "${TEST_DIR}"/check.pid - [ -f "${TEST_DIR}"/check.sts ] && rm -rf "${TEST_DIR}"/check.sts - if $showme then : @@ -201,24 +194,6 @@ END { if (NR > 0) { trap "_wrapup; exit \$status" 0 1 2 3 15 -# for hangcheck ... -# Save pid of check in a well known place, so that hangcheck can be sure it -# has the right pid (getting the pid from ps output is not reliable enough). -# -rm -rf "${TEST_DIR}"/check.pid -echo $$ > "${TEST_DIR}"/check.pid - -# for hangcheck ... -# Save the status of check in a well known place, so that hangcheck can be -# sure to know where check is up to (getting test number from ps output is -# not reliable enough since the trace stuff has been introduced). -# -rm -rf "${TEST_DIR}"/check.sts -echo "preamble" > "${TEST_DIR}"/check.sts - -# don't leave old full output behind on a clean run -rm -f check.full - [ -f $TIMESTAMP_FILE ] || touch $TIMESTAMP_FILE FULL_IMGFMT_DETAILS=`_full_imgfmt_details` @@ -276,9 +251,6 @@ do fi rm -f core $seq.notrun - # for hangcheck ... - echo "$seq" > "${TEST_DIR}"/check.sts - start=`_wallclock` $timestamp && printf %s " [$(date "+%T")]" diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common index d34c11c056..867918895b 100644 --- a/tests/qemu-iotests/common +++ b/tests/qemu-iotests/common @@ -19,17 +19,6 @@ # common procedures for QA scripts # -_setenvironment() -{ - MSGVERB="text:action" - export MSGVERB -} - -rm -f "$OUTPUT_DIR/$iam.out" -_setenvironment - -check=${check-true} - diff="diff -u" verbose=false debug=false @@ -40,7 +29,6 @@ showme=false sortme=false expunge=true have_test_arg=false -randomize=false cachemode=false rm -f $tmp.list $tmp.tmp $tmp.sed @@ -170,7 +158,6 @@ other options -n show me, do not run tests -o options -o options to pass to qemu-img create/convert -T output timestamps - -r randomize test order -c mode cache mode testlist options @@ -327,11 +314,6 @@ testlist options cachemode=true xpand=false ;; - -r) # randomize test order - randomize=true - xpand=false - ;; - -T) # turn on timestamp output timestamp=true xpand=false @@ -445,11 +427,6 @@ fi list=`sort $tmp.list` rm -f $tmp.list $tmp.tmp $tmp.sed -if $randomize -then - list=`echo $list | awk -f randomize.awk` -fi - [ "$QEMU" = "" ] && _fatal "qemu not found" [ "$QEMU_IMG" = "" ] && _fatal "qemu-img not found" [ "$QEMU_IO" = "" ] && _fatal "qemu-io not found" diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config index e0883a0c65..b599c72211 100644 --- a/tests/qemu-iotests/common.config +++ b/tests/qemu-iotests/common.config @@ -15,33 +15,14 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . # -# -# setup and check for config parameters, and in particular -# -# EMAIL - email of the script runner. -# TEST_DIR - scratch test directory -# -# - These can be added to $HOST_CONFIG_DIR (witch default to ./config) -# below or a separate local configuration file can be used (using -# the HOST_OPTIONS variable). -# - This script is shared by the stress test system and the auto-qa -# system (includes both regression test and benchmark components). -# - this script shouldn't make any assertions about filesystem -# validity or mountedness. -# - # all tests should use a common language setting to prevent golden # output mismatches. export LANG=C PATH=".:$PATH" -HOST=`hostname -s 2> /dev/null` HOSTOS=`uname -s` -EMAIL=root@localhost # where auto-qa will send its status messages -export HOST_OPTIONS=${HOST_OPTIONS:=local.config} -export CHECK_OPTIONS=${CHECK_OPTIONS:="-g auto"} export PWD=`pwd` export _QEMU_HANDLE=0 @@ -78,11 +59,6 @@ _fatal() export AWK_PROG="`set_prog_path awk`" [ "$AWK_PROG" = "" ] && _fatal "awk not found" -export SED_PROG="`set_prog_path sed`" -[ "$SED_PROG" = "" ] && _fatal "sed not found" - -export PS_ALL_FLAGS="-ef" - if [ -z "$QEMU_PROG" ]; then export QEMU_PROG="`set_prog_path qemu`" fi @@ -198,8 +174,6 @@ fi export QEMU_DEFAULT_MACHINE="$default_machine" -[ -f /etc/qemu-iotest.config ] && . /etc/qemu-iotest.config - if [ -z "$TEST_DIR" ]; then TEST_DIR=`pwd`/scratch fi diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 8d486dbeb4..5938d5145f 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -293,51 +293,6 @@ _img_info() done } -_get_pids_by_name() -{ - if [ $# -ne 1 ] - then - echo "Usage: _get_pids_by_name process-name" 1>&2 - exit 1 - fi - - # Algorithm ... all ps(1) variants have a time of the form MM:SS or - # HH:MM:SS before the psargs field, use this as the search anchor. - # - # Matches with $1 (process-name) occur if the first psarg is $1 - # or ends in /$1 ... the matching uses sed's regular expressions, - # so passing a regex into $1 will work. - - ps $PS_ALL_FLAGS \ - | sed -n \ - -e 's/$/ /' \ - -e 's/[ ][ ]*/ /g' \ - -e 's/^ //' \ - -e 's/^[^ ]* //' \ - -e "/[0-9]:[0-9][0-9] *[^ ]*\/$1 /s/ .*//p" \ - -e "/[0-9]:[0-9][0-9] *$1 /s/ .*//p" -} - -# fqdn for localhost -# -_get_fqdn() -{ - host=`hostname` - $NSLOOKUP_PROG $host | $AWK_PROG '{ if ($1 == "Name:") print $2 }' -} - -# check if run as root -# -_need_to_be_root() -{ - id=`id | $SED_PROG -e 's/(.*//' -e 's/.*=//'` - if [ "$id" -ne 0 ] - then - echo "Arrgh ... you need to be root (not uid=$id) to run this test" - exit 1 - fi -} - # bail out, setting up .notrun file # _notrun() @@ -491,28 +446,5 @@ _full_platform_details() echo "$os/$platform $host $kernel" } -_link_out_file() -{ - if [ -z "$1" ]; then - echo Error must pass \$seq. - exit - fi - rm -f $1 - if [ "`uname`" == "IRIX64" ] || [ "`uname`" == "IRIX" ]; then - ln -s $1.irix $1 - elif [ "`uname`" == "Linux" ]; then - ln -s $1.linux $1 - else - echo Error test $seq does not run on the operating system: `uname` - exit - fi -} - -_die() -{ - echo $@ - exit 1 -} - # make sure this script returns success true From 9ee4b6f803335d23e3b6fe21c139707918c3eb24 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Sep 2017 16:44:51 +0200 Subject: [PATCH 24/54] qemu-iotests: get rid of AWK_PROG Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/check | 4 ++-- tests/qemu-iotests/common | 2 +- tests/qemu-iotests/common.config | 3 --- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index 041780b09f..67da67bd54 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -128,7 +128,7 @@ tmp="${TEST_DIR}"/$$ _wallclock() { - date "+%H %M %S" | $AWK_PROG '{ print $1*3600 + $2*60 + $3 }' + date "+%H %M %S" | awk '{ print $1*3600 + $2*60 + $3 }' } _timestamp() @@ -147,7 +147,7 @@ _wrapup() if [ -f $TIMESTAMP_FILE -a -f $tmp.time ] then cat $TIMESTAMP_FILE $tmp.time \ - | $AWK_PROG ' + | awk ' { t[$1] = $2 } END { if (NR > 0) { for (i in t) print i " " t[i] diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common index 867918895b..130f647a4d 100644 --- a/tests/qemu-iotests/common +++ b/tests/qemu-iotests/common @@ -366,7 +366,7 @@ testlist options if $xpand then have_test_arg=true - $AWK_PROG Date: Tue, 12 Sep 2017 16:44:52 +0200 Subject: [PATCH 25/54] qemu-iotests: move "check" code out of common.rc Some functions in common.rc are never used by the tests. Move them out of that file and into common, which is already included only by "check". Code that actually *is* common to "check" and tests can be placed in common.config. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/common | 25 +++++++++++++++++++- tests/qemu-iotests/common.config | 12 ++++++++++ tests/qemu-iotests/common.rc | 40 -------------------------------- 3 files changed, 36 insertions(+), 41 deletions(-) diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common index 130f647a4d..2e98e64d5c 100644 --- a/tests/qemu-iotests/common +++ b/tests/qemu-iotests/common @@ -19,6 +19,24 @@ # common procedures for QA scripts # +_full_imgfmt_details() +{ + if [ -n "$IMGOPTS" ]; then + echo "$IMGFMT ($IMGOPTS)" + else + echo "$IMGFMT" + fi +} + +_full_platform_details() +{ + os=`uname -s` + host=`hostname -s` + kernel=`uname -r` + platform=`uname -m` + echo "$os/$platform $host $kernel" +} + diff="diff -u" verbose=false debug=false @@ -404,7 +422,12 @@ if [ "$IMGOPTSSYNTAX" != "true" ]; then fi # Set default options for qemu-img create -o if they were not specified -_set_default_imgopts +if [ "$IMGFMT" == "qcow2" ] && ! (echo "$IMGOPTS" | grep "compat=" > /dev/null); then + IMGOPTS=$(_optstr_add "$IMGOPTS" "compat=1.1") +fi +if [ "$IMGFMT" == "luks" ] && ! (echo "$IMGOPTS" | grep "iter-time=" > /dev/null); then + IMGOPTS=$(_optstr_add "$IMGOPTS" "iter-time=10") +fi if [ -s $tmp.list ] then diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config index 0f571d46eb..91da65f3dc 100644 --- a/tests/qemu-iotests/common.config +++ b/tests/qemu-iotests/common.config @@ -27,6 +27,9 @@ export PWD=`pwd` export _QEMU_HANDLE=0 +# make sure we have a standard umask +umask 022 + # $1 = prog to look for, $2* = default pathnames if not found in $PATH set_prog_path() { @@ -49,6 +52,15 @@ set_prog_path() return 1 } +_optstr_add() +{ + if [ -n "$1" ]; then + echo "$1,$2" + else + echo "$2" + fi +} + _fatal() { echo "$*" diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 5938d5145f..6f6e03366f 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -50,9 +50,6 @@ then fi fi -# make sure we have a standard umask -umask 022 - if [ "$IMGOPTSSYNTAX" = "true" ]; then DRIVER="driver=$IMGFMT" if [ "$IMGFMT" = "luks" ]; then @@ -94,25 +91,6 @@ else fi ORIG_TEST_IMG="$TEST_IMG" -_optstr_add() -{ - if [ -n "$1" ]; then - echo "$1,$2" - else - echo "$2" - fi -} - -_set_default_imgopts() -{ - if [ "$IMGFMT" == "qcow2" ] && ! (echo "$IMGOPTS" | grep "compat=" > /dev/null); then - IMGOPTS=$(_optstr_add "$IMGOPTS" "compat=1.1") - fi - if [ "$IMGFMT" == "luks" ] && ! (echo "$IMGOPTS" | grep "iter-time=" > /dev/null); then - IMGOPTS=$(_optstr_add "$IMGOPTS" "iter-time=10") - fi -} - _use_sample_img() { SAMPLE_IMG_FILE="${1%\.bz2}" @@ -428,23 +406,5 @@ _require_command() [ -x "$c" ] || _notrun "$1 utility required, skipped this test" } -_full_imgfmt_details() -{ - if [ -n "$IMGOPTS" ]; then - echo "$IMGFMT ($IMGOPTS)" - else - echo "$IMGFMT" - fi -} - -_full_platform_details() -{ - os=`uname -s` - host=`hostname -s` - kernel=`uname -r` - platform=`uname -m` - echo "$os/$platform $host $kernel" -} - # make sure this script returns success true From cceaf1db6fbede3ed0bca103d12f8a19c1374e42 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Sep 2017 16:44:53 +0200 Subject: [PATCH 26/54] qemu-iotests: cleanup and fix search for programs Instead of ./check failing when a binary is missing, we try each test case now and each one fails with tons of test case diffs. Also, all the variables were initialized by "check" prior to "common" being sourced, and then (uselessly) checked for emptiness again in "check". Centralize the search for programs in "common" (which will soon be one with "check"), including the "realpath" invocation which can be done just once in "check" rather than in the tests. For qnio_server, move the detection to "common", simplifying set_prog_path to stop handling the unused second argument, and embedding the "realpath" pass. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/check | 41 ----------------- tests/qemu-iotests/common | 79 +++++++++++++++++++++++++++++--- tests/qemu-iotests/common.config | 61 +----------------------- 3 files changed, 74 insertions(+), 107 deletions(-) diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index 67da67bd54..03871d0681 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -60,47 +60,6 @@ fi build_root="$build_iotests/../.." -if [ -x "$build_iotests/socket_scm_helper" ] -then - export SOCKET_SCM_HELPER="$build_iotests/socket_scm_helper" -fi - -if [[ -z "$QEMU_PROG" && ! -x './qemu' ]] -then - arch=$(uname -m 2> /dev/null) - - if [[ -n $arch && -x "$build_root/$arch-softmmu/qemu-system-$arch" ]] - then - export QEMU_PROG="$build_root/$arch-softmmu/qemu-system-$arch" - else - pushd "$build_root" > /dev/null - for binary in *-softmmu/qemu-system-* - do - if [ -x "$binary" ] - then - export QEMU_PROG="$build_root/$binary" - break - fi - done - popd > /dev/null - fi -fi - -if [[ -z $QEMU_IMG_PROG && -x "$build_root/qemu-img" && ! -x './qemu-img' ]] -then - export QEMU_IMG_PROG="$build_root/qemu-img" -fi - -if [[ -z $QEMU_IO_PROG && -x "$build_root/qemu-io" && ! -x './qemu-io' ]] -then - export QEMU_IO_PROG="$build_root/qemu-io" -fi - -if [[ -z $QEMU_NBD_PROG && -x "$build_root/qemu-nbd" && ! -x './qemu-nbd' ]] -then - export QEMU_NBD_PROG="$build_root/qemu-nbd" -fi - # we need common.env if ! . "$build_iotests/common.env" then diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common index 2e98e64d5c..abacc24114 100644 --- a/tests/qemu-iotests/common +++ b/tests/qemu-iotests/common @@ -37,6 +37,17 @@ _full_platform_details() echo "$os/$platform $host $kernel" } +# $1 = prog to look for +set_prog_path() +{ + p=`command -v $1 2> /dev/null` + if [ -n "$p" -a -x "$p" ]; then + realpath -- "$(type -p "$p")" + else + return 1 + fi +} + diff="diff -u" verbose=false debug=false @@ -450,10 +461,66 @@ fi list=`sort $tmp.list` rm -f $tmp.list $tmp.tmp $tmp.sed -[ "$QEMU" = "" ] && _fatal "qemu not found" -[ "$QEMU_IMG" = "" ] && _fatal "qemu-img not found" -[ "$QEMU_IO" = "" ] && _fatal "qemu-io not found" - -if [ "$IMGPROTO" = "nbd" ] ; then - [ "$QEMU_NBD" = "" ] && _fatal "qemu-nbd not found" +if [ -z "$QEMU_PROG" ] +then + if [ -x "$build_iotests/qemu" ]; then + export QEMU_PROG="$build_iotests/qemu" + elif [ -x "$build_root/$arch-softmmu/qemu-system-$arch" ]; then + export QEMU_PROG="$build_root/$arch-softmmu/qemu-system-$arch" + else + pushd "$build_root" > /dev/null + for binary in *-softmmu/qemu-system-* + do + if [ -x "$binary" ] + then + export QEMU_PROG="$build_root/$binary" + break + fi + done + popd > /dev/null + [ "$QEMU_PROG" = "" ] && _init_error "qemu not found" + fi +fi +export QEMU_PROG=$(realpath -- "$(type -p "$QEMU_PROG")") + +if [ -z "$QEMU_IMG_PROG" ]; then + if [ -x "$build_iotests/qemu-img" ]; then + export QEMU_IMG_PROG="$build_iotests/qemu-img" + elif [ -x "$build_root/qemu-img" ]; then + export QEMU_IMG_PROG="$build_root/qemu-img" + else + _init_error "qemu-img not found" + fi +fi +export QEMU_IMG_PROG=$(realpath -- "$(type -p "$QEMU_IMG_PROG")") + +if [ -z "$QEMU_IO_PROG" ]; then + if [ -x "$build_iotests/qemu-io" ]; then + export QEMU_IO_PROG="$build_iotests/qemu-io" + elif [ -x "$build_root/qemu-io" ]; then + export QEMU_IO_PROG="$build_root/qemu-io" + else + _init_error "qemu-io not found" + fi +fi +export QEMU_IO_PROG=$(realpath -- "$(type -p "$QEMU_IO_PROG")") + +if [ -z $QEMU_NBD_PROG ]; then + if [ -x "$build_iotests/qemu-nbd" ]; then + export QEMU_NBD_PROG="$build_iotests/qemu-nbd" + elif [ -x "$build_root/qemu-nbd" ]; then + export QEMU_NBD_PROG="$build_root/qemu-nbd" + else + _init_error "qemu-nbd not found" + fi +fi +export QEMU_NBD_PROG=$(realpath -- "$(type -p "$QEMU_NBD_PROG")") + +if [ -z "$QEMU_VXHS_PROG" ]; then + export QEMU_VXHS_PROG="`set_prog_path qnio_server`" +fi + +if [ -x "$build_iotests/socket_scm_helper" ] +then + export SOCKET_SCM_HELPER="$build_iotests/socket_scm_helper" fi diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config index 91da65f3dc..c97c63983f 100644 --- a/tests/qemu-iotests/common.config +++ b/tests/qemu-iotests/common.config @@ -22,6 +22,7 @@ export LANG=C PATH=".:$PATH" HOSTOS=`uname -s` +arch=`uname -m` export PWD=`pwd` @@ -30,28 +31,6 @@ export _QEMU_HANDLE=0 # make sure we have a standard umask umask 022 -# $1 = prog to look for, $2* = default pathnames if not found in $PATH -set_prog_path() -{ - p=`command -v $1 2> /dev/null` - if [ -n "$p" -a -x "$p" ]; then - echo $p - return 0 - fi - p=$1 - - shift - for f; do - if [ -x $f ]; then - echo $f - return 0 - fi - done - - echo "" - return 1 -} - _optstr_add() { if [ -n "$1" ]; then @@ -61,44 +40,6 @@ _optstr_add() fi } -_fatal() -{ - echo "$*" - status=1 - exit 1 -} - -if [ -z "$QEMU_PROG" ]; then - export QEMU_PROG="`set_prog_path qemu`" -fi - -if [ -z "$QEMU_IMG_PROG" ]; then - export QEMU_IMG_PROG="`set_prog_path qemu-img`" -fi - -if [ -z "$QEMU_IO_PROG" ]; then - export QEMU_IO_PROG="`set_prog_path qemu-io`" -fi - -if [ -z "$QEMU_NBD_PROG" ]; then - export QEMU_NBD_PROG="`set_prog_path qemu-nbd`" -fi - -if [ -z "$QEMU_VXHS_PROG" ]; then - export QEMU_VXHS_PROG="`set_prog_path qnio_server`" -fi - -export QEMU_PROG=$(realpath -- "$(type -p "$QEMU_PROG")") -export QEMU_IMG_PROG=$(realpath -- "$(type -p "$QEMU_IMG_PROG")") -export QEMU_IO_PROG=$(realpath -- "$(type -p "$QEMU_IO_PROG")") -export QEMU_NBD_PROG=$(realpath -- "$(type -p "$QEMU_NBD_PROG")") - -# This program is not built as part of qemu but (possibly) provided by the -# system, so it may not be present at all -if [ -n "$QEMU_VXHS_PROG" ]; then - export QEMU_VXHS_PROG=$(realpath -- "$(type -p "$QEMU_VXHS_PROG")") -fi - _qemu_wrapper() { ( From d1f2447a3e8d0804838e2445f955996c925395ef Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Sep 2017 16:44:54 +0200 Subject: [PATCH 27/54] qemu-iotests: limit non-_PROG-suffixed variables to common.rc These are never used by "check", with one exception that does not need $QEMU_OPTIONS. Keep them in common.rc, which will be soon included only by the tests. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/039.out | 10 ++--- tests/qemu-iotests/061.out | 4 +- tests/qemu-iotests/137.out | 2 +- tests/qemu-iotests/common.config | 69 +------------------------------- tests/qemu-iotests/common.rc | 65 ++++++++++++++++++++++++++++++ 5 files changed, 75 insertions(+), 75 deletions(-) diff --git a/tests/qemu-iotests/039.out b/tests/qemu-iotests/039.out index c6e0ac2da3..724d7b2508 100644 --- a/tests/qemu-iotests/039.out +++ b/tests/qemu-iotests/039.out @@ -11,7 +11,7 @@ No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -./common.config: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then +./common.rc: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@"; else exec "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@"; @@ -50,7 +50,7 @@ read 512/512 bytes at offset 0 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -./common.config: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then +./common.rc: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@"; else exec "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@"; @@ -68,7 +68,7 @@ incompatible_features 0x0 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -./common.config: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then +./common.rc: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@"; else exec "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@"; @@ -91,7 +91,7 @@ No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -./common.config: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then +./common.rc: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@"; else exec "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@"; @@ -105,7 +105,7 @@ Data may be corrupted, or further writes to the image may corrupt it. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -./common.config: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then +./common.rc: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@"; else exec "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@"; diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out index a431b7f305..942485de99 100644 --- a/tests/qemu-iotests/061.out +++ b/tests/qemu-iotests/061.out @@ -57,7 +57,7 @@ No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 wrote 131072/131072 bytes at offset 0 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -./common.config: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then +./common.rc: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@"; else exec "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@"; @@ -219,7 +219,7 @@ No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 wrote 131072/131072 bytes at offset 0 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -./common.config: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then +./common.rc: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@"; else exec "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@"; diff --git a/tests/qemu-iotests/137.out b/tests/qemu-iotests/137.out index c0e753483b..05efd74d17 100644 --- a/tests/qemu-iotests/137.out +++ b/tests/qemu-iotests/137.out @@ -31,7 +31,7 @@ Cache clean interval too big Unsupported value 'blubb' for qcow2 option 'overlap-check'. Allowed are any of the following: none, constant, cached, all wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -./common.config: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then +./common.rc: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@"; else exec "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@"; diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config index c97c63983f..c7f0a7a7e0 100644 --- a/tests/qemu-iotests/common.config +++ b/tests/qemu-iotests/common.config @@ -40,71 +40,6 @@ _optstr_add() fi } -_qemu_wrapper() -{ - ( - if [ -n "${QEMU_NEED_PID}" ]; then - echo $BASHPID > "${QEMU_TEST_DIR}/qemu-${_QEMU_HANDLE}.pid" - fi - exec "$QEMU_PROG" $QEMU_OPTIONS "$@" - ) -} - -_qemu_img_wrapper() -{ - (exec "$QEMU_IMG_PROG" $QEMU_IMG_OPTIONS "$@") -} - -_qemu_io_wrapper() -{ - local VALGRIND_LOGFILE="${TEST_DIR}"/$$.valgrind - local QEMU_IO_ARGS="$QEMU_IO_OPTIONS" - if [ "$IMGOPTSSYNTAX" = "true" ]; then - QEMU_IO_ARGS="--image-opts $QEMU_IO_ARGS" - if [ -n "$IMGKEYSECRET" ]; then - QEMU_IO_ARGS="--object secret,id=keysec0,data=$IMGKEYSECRET $QEMU_IO_ARGS" - fi - fi - local RETVAL - ( - if [ "${VALGRIND_QEMU}" == "y" ]; then - exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@" - else - exec "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@" - fi - ) - RETVAL=$? - if [ "${VALGRIND_QEMU}" == "y" ]; then - if [ $RETVAL == 99 ]; then - cat "${VALGRIND_LOGFILE}" - fi - rm -f "${VALGRIND_LOGFILE}" - fi - (exit $RETVAL) -} - -_qemu_nbd_wrapper() -{ - ( - echo $BASHPID > "${QEMU_TEST_DIR}/qemu-nbd.pid" - exec "$QEMU_NBD_PROG" $QEMU_NBD_OPTIONS "$@" - ) -} - -_qemu_vxhs_wrapper() -{ - ( - echo $BASHPID > "${TEST_DIR}/qemu-vxhs.pid" - exec "$QEMU_VXHS_PROG" $QEMU_VXHS_OPTIONS "$@" - ) -} - -export QEMU=_qemu_wrapper -export QEMU_IMG=_qemu_img_wrapper -export QEMU_IO=_qemu_io_wrapper -export QEMU_NBD=_qemu_nbd_wrapper -export QEMU_VXHS=_qemu_vxhs_wrapper - QEMU_IMG_EXTRA_ARGS= if [ "$IMGOPTSSYNTAX" = "true" ]; then QEMU_IMG_EXTRA_ARGS="--image-opts $QEMU_IMG_EXTRA_ARGS" @@ -115,8 +50,8 @@ fi export QEMU_IMG_EXTRA_ARGS -default_machine=$($QEMU -machine help | sed -n '/(default)/ s/ .*//p') -default_alias_machine=$($QEMU -machine help | \ +default_machine=$($QEMU_PROG -machine help | sed -n '/(default)/ s/ .*//p') +default_alias_machine=$($QEMU_PROG -machine help | \ sed -n "/(alias of $default_machine)/ { s/ .*//p; q; }") if [[ "$default_alias_machine" ]]; then default_machine="$default_alias_machine" diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 6f6e03366f..e7f74b4dbd 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -50,6 +50,71 @@ then fi fi +_qemu_wrapper() +{ + ( + if [ -n "${QEMU_NEED_PID}" ]; then + echo $BASHPID > "${QEMU_TEST_DIR}/qemu-${_QEMU_HANDLE}.pid" + fi + exec "$QEMU_PROG" $QEMU_OPTIONS "$@" + ) +} + +_qemu_img_wrapper() +{ + (exec "$QEMU_IMG_PROG" $QEMU_IMG_OPTIONS "$@") +} + +_qemu_io_wrapper() +{ + local VALGRIND_LOGFILE="${TEST_DIR}"/$$.valgrind + local QEMU_IO_ARGS="$QEMU_IO_OPTIONS" + if [ "$IMGOPTSSYNTAX" = "true" ]; then + QEMU_IO_ARGS="--image-opts $QEMU_IO_ARGS" + if [ -n "$IMGKEYSECRET" ]; then + QEMU_IO_ARGS="--object secret,id=keysec0,data=$IMGKEYSECRET $QEMU_IO_ARGS" + fi + fi + local RETVAL + ( + if [ "${VALGRIND_QEMU}" == "y" ]; then + exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@" + else + exec "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@" + fi + ) + RETVAL=$? + if [ "${VALGRIND_QEMU}" == "y" ]; then + if [ $RETVAL == 99 ]; then + cat "${VALGRIND_LOGFILE}" + fi + rm -f "${VALGRIND_LOGFILE}" + fi + (exit $RETVAL) +} + +_qemu_nbd_wrapper() +{ + ( + echo $BASHPID > "${QEMU_TEST_DIR}/qemu-nbd.pid" + exec "$QEMU_NBD_PROG" $QEMU_NBD_OPTIONS "$@" + ) +} + +_qemu_vxhs_wrapper() +{ + ( + echo $BASHPID > "${TEST_DIR}/qemu-vxhs.pid" + exec "$QEMU_VXHS_PROG" $QEMU_VXHS_OPTIONS "$@" + ) +} + +export QEMU=_qemu_wrapper +export QEMU_IMG=_qemu_img_wrapper +export QEMU_IO=_qemu_io_wrapper +export QEMU_NBD=_qemu_nbd_wrapper +export QEMU_VXHS=_qemu_vxhs_wrapper + if [ "$IMGOPTSSYNTAX" = "true" ]; then DRIVER="driver=$IMGFMT" if [ "$IMGFMT" = "luks" ]; then From 3817ce03bf3373065c25e389ecb7f23a511ebcdc Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Sep 2017 16:44:55 +0200 Subject: [PATCH 28/54] qemu-iotests: do not include common.rc in "check" It only provides functions used by the test programs. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/check | 6 ------ tests/qemu-iotests/common.rc | 13 +++++-------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index 03871d0681..e39680ade2 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -72,12 +72,6 @@ then _init_error "failed to source common.config" fi -# we need common.rc -if ! . "$source_iotests/common.rc" -then - _init_error "failed to source common.rc" -fi - # we need common . "$source_iotests/common" diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index e7f74b4dbd..20f6821a69 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -40,14 +40,11 @@ poke_file() printf "$3" | dd "of=$1" bs=1 "seek=$2" conv=notrunc &>/dev/null } -# we need common.config -if [ "$iam" != "check" ] -then - if ! . ./common.config - then - echo "$iam: failed to source common.config" - exit 1 - fi + +if ! . ./common.config + then + echo "$iam: failed to source common.config" + exit 1 fi _qemu_wrapper() From cce293a29459e1fd5f74e55d87dc9c25991da578 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Sep 2017 16:44:56 +0200 Subject: [PATCH 29/54] qemu-iotests: disintegrate more parts of common.config Split "check" parts from tests part. For the directory setup, the actual computation of directories goes in "check", while the sanity checks go in the tests. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/common | 24 ++++++++++++++++ tests/qemu-iotests/common.config | 49 -------------------------------- tests/qemu-iotests/common.qemu | 1 + tests/qemu-iotests/common.rc | 25 ++++++++++++++++ 4 files changed, 50 insertions(+), 49 deletions(-) diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common index abacc24114..ee313af92f 100644 --- a/tests/qemu-iotests/common +++ b/tests/qemu-iotests/common @@ -48,6 +48,14 @@ set_prog_path() fi } +if [ -z "$TEST_DIR" ]; then + TEST_DIR=`pwd`/scratch +fi + +if [ ! -e "$TEST_DIR" ]; then + mkdir "$TEST_DIR" +fi + diff="diff -u" verbose=false debug=false @@ -440,6 +448,13 @@ if [ "$IMGFMT" == "luks" ] && ! (echo "$IMGOPTS" | grep "iter-time=" > /dev/null IMGOPTS=$(_optstr_add "$IMGOPTS" "iter-time=10") fi +if [ -z "$SAMPLE_IMG_DIR" ]; then + SAMPLE_IMG_DIR="$source_iotests/sample_images" +fi + +export TEST_DIR +export SAMPLE_IMG_DIR + if [ -s $tmp.list ] then # found some valid test numbers ... this is good @@ -524,3 +539,12 @@ if [ -x "$build_iotests/socket_scm_helper" ] then export SOCKET_SCM_HELPER="$build_iotests/socket_scm_helper" fi + +default_machine=$($QEMU_PROG -machine help | sed -n '/(default)/ s/ .*//p') +default_alias_machine=$($QEMU_PROG -machine help | \ + sed -n "/(alias of $default_machine)/ { s/ .*//p; q; }") +if [[ "$default_alias_machine" ]]; then + default_machine="$default_alias_machine" +fi + +export QEMU_DEFAULT_MACHINE="$default_machine" diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config index c7f0a7a7e0..cdcda54546 100644 --- a/tests/qemu-iotests/common.config +++ b/tests/qemu-iotests/common.config @@ -26,8 +26,6 @@ arch=`uname -m` export PWD=`pwd` -export _QEMU_HANDLE=0 - # make sure we have a standard umask umask 022 @@ -40,52 +38,5 @@ _optstr_add() fi } -QEMU_IMG_EXTRA_ARGS= -if [ "$IMGOPTSSYNTAX" = "true" ]; then - QEMU_IMG_EXTRA_ARGS="--image-opts $QEMU_IMG_EXTRA_ARGS" - if [ -n "$IMGKEYSECRET" ]; then - QEMU_IMG_EXTRA_ARGS="--object secret,id=keysec0,data=$IMGKEYSECRET $QEMU_IMG_EXTRA_ARGS" - fi -fi -export QEMU_IMG_EXTRA_ARGS - - -default_machine=$($QEMU_PROG -machine help | sed -n '/(default)/ s/ .*//p') -default_alias_machine=$($QEMU_PROG -machine help | \ - sed -n "/(alias of $default_machine)/ { s/ .*//p; q; }") -if [[ "$default_alias_machine" ]]; then - default_machine="$default_alias_machine" -fi - -export QEMU_DEFAULT_MACHINE="$default_machine" - -if [ -z "$TEST_DIR" ]; then - TEST_DIR=`pwd`/scratch -fi - -QEMU_TEST_DIR="${TEST_DIR}" - -if [ ! -e "$TEST_DIR" ]; then - mkdir "$TEST_DIR" -fi - -if [ ! -d "$TEST_DIR" ]; then - echo "common.config: Error: \$TEST_DIR ($TEST_DIR) is not a directory" - exit 1 -fi - -export TEST_DIR - -if [ -z "$SAMPLE_IMG_DIR" ]; then - SAMPLE_IMG_DIR="$source_iotests/sample_images" -fi - -if [ ! -d "$SAMPLE_IMG_DIR" ]; then - echo "common.config: Error: \$SAMPLE_IMG_DIR ($SAMPLE_IMG_DIR) is not a directory" - exit 1 -fi - -export SAMPLE_IMG_DIR - # make sure this script returns success true diff --git a/tests/qemu-iotests/common.qemu b/tests/qemu-iotests/common.qemu index 7645f1dc72..9f9aecc9df 100644 --- a/tests/qemu-iotests/common.qemu +++ b/tests/qemu-iotests/common.qemu @@ -31,6 +31,7 @@ QEMU_FIFO_IN="${QEMU_TEST_DIR}/qmp-in-$$" QEMU_FIFO_OUT="${QEMU_TEST_DIR}/qmp-out-$$" QEMU_HANDLE=0 +export _QEMU_HANDLE=0 # If bash version is >= 4.1, these will be overwritten and dynamic # file descriptor values assigned. diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 20f6821a69..f4dc0104e6 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -114,6 +114,10 @@ export QEMU_VXHS=_qemu_vxhs_wrapper if [ "$IMGOPTSSYNTAX" = "true" ]; then DRIVER="driver=$IMGFMT" + QEMU_IMG_EXTRA_ARGS="--image-opts $QEMU_IMG_EXTRA_ARGS" + if [ -n "$IMGKEYSECRET" ]; then + QEMU_IMG_EXTRA_ARGS="--object secret,id=keysec0,data=$IMGKEYSECRET $QEMU_IMG_EXTRA_ARGS" + fi if [ "$IMGFMT" = "luks" ]; then DRIVER="$DRIVER,key-secret=keysec0" fi @@ -133,6 +137,7 @@ if [ "$IMGOPTSSYNTAX" = "true" ]; then TEST_IMG="$DRIVER,file.driver=$IMGPROTO,file.filename=$TEST_DIR/t.$IMGFMT" fi else + QEMU_IMG_EXTRA_ARGS= if [ "$IMGPROTO" = "file" ]; then TEST_IMG=$TEST_DIR/t.$IMGFMT elif [ "$IMGPROTO" = "nbd" ]; then @@ -153,6 +158,26 @@ else fi ORIG_TEST_IMG="$TEST_IMG" +if [ -z "$TEST_DIR" ]; then + TEST_DIR=`pwd`/scratch +fi + +QEMU_TEST_DIR="${TEST_DIR}" + +if [ ! -e "$TEST_DIR" ]; then + mkdir "$TEST_DIR" +fi + +if [ ! -d "$TEST_DIR" ]; then + echo "common.config: Error: \$TEST_DIR ($TEST_DIR) is not a directory" + exit 1 +fi + +if [ ! -d "$SAMPLE_IMG_DIR" ]; then + echo "common.config: Error: \$SAMPLE_IMG_DIR ($SAMPLE_IMG_DIR) is not a directory" + exit 1 +fi + _use_sample_img() { SAMPLE_IMG_FILE="${1%\.bz2}" From 8f4dcaba9bed36d067f2f2da1e2ceb9d1b50f2b2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Sep 2017 16:44:57 +0200 Subject: [PATCH 30/54] qemu-iotests: fix uninitialized variable The variable is used in "common" but defined only after the file is sourced. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/check | 2 -- tests/qemu-iotests/common | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index e39680ade2..2d2ef687ad 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -77,8 +77,6 @@ fi TIMESTAMP_FILE=check.time-$IMGPROTO-$IMGFMT -tmp="${TEST_DIR}"/$$ - _wallclock() { date "+%H %M %S" | awk '{ print $1*3600 + $2*60 + $3 }' diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common index ee313af92f..365d3c4349 100644 --- a/tests/qemu-iotests/common +++ b/tests/qemu-iotests/common @@ -67,6 +67,8 @@ sortme=false expunge=true have_test_arg=false cachemode=false + +tmp="${TEST_DIR}"/$$ rm -f $tmp.list $tmp.tmp $tmp.sed export IMGFMT=raw From 4e670492efe29b809f5b1d88eee2adcdf70d8d13 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Sep 2017 16:44:58 +0200 Subject: [PATCH 31/54] qemu-iotests: get rid of $iam The variable is almost unused, and one of the two uses is actually uninitialized. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/check | 5 +---- tests/qemu-iotests/common.rc | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index 2d2ef687ad..010bf48e92 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -30,12 +30,9 @@ interrupt=true # by default don't output timestamps timestamp=${TIMESTAMP:=false} -# generic initialization -iam=check - _init_error() { - echo "$iam: $1" >&2 + echo "check: $1" >&2 exit 1 } diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index f4dc0104e6..0e8a33c696 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -43,7 +43,7 @@ poke_file() if ! . ./common.config then - echo "$iam: failed to source common.config" + echo "$0: failed to source common.config" exit 1 fi From 09d653e6176a5e92f5d2a3b2270f386d9ce6a544 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Sep 2017 16:44:59 +0200 Subject: [PATCH 32/54] qemu-iotests: merge "check" and "common" "check" is full of qemu-iotests--specific details. Separating it from "common" does not make much sense anymore. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/check | 533 +++++++++++++++++++++++++++++++++++- tests/qemu-iotests/common | 552 -------------------------------------- 2 files changed, 531 insertions(+), 554 deletions(-) delete mode 100644 tests/qemu-iotests/common diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index 010bf48e92..176cb8e937 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -69,8 +69,537 @@ then _init_error "failed to source common.config" fi -# we need common -. "$source_iotests/common" +_full_imgfmt_details() +{ + if [ -n "$IMGOPTS" ]; then + echo "$IMGFMT ($IMGOPTS)" + else + echo "$IMGFMT" + fi +} + +_full_platform_details() +{ + os=`uname -s` + host=`hostname -s` + kernel=`uname -r` + platform=`uname -m` + echo "$os/$platform $host $kernel" +} + +# $1 = prog to look for +set_prog_path() +{ + p=`command -v $1 2> /dev/null` + if [ -n "$p" -a -x "$p" ]; then + realpath -- "$(type -p "$p")" + else + return 1 + fi +} + +if [ -z "$TEST_DIR" ]; then + TEST_DIR=`pwd`/scratch +fi + +if [ ! -e "$TEST_DIR" ]; then + mkdir "$TEST_DIR" +fi + +diff="diff -u" +verbose=false +debug=false +group=false +xgroup=false +imgopts=false +showme=false +sortme=false +expunge=true +have_test_arg=false +cachemode=false + +tmp="${TEST_DIR}"/$$ +rm -f $tmp.list $tmp.tmp $tmp.sed + +export IMGFMT=raw +export IMGFMT_GENERIC=true +export IMGPROTO=file +export IMGOPTS="" +export CACHEMODE="writeback" +export QEMU_IO_OPTIONS="" +export QEMU_IO_OPTIONS_NO_FMT="" +export CACHEMODE_IS_DEFAULT=true +export QEMU_OPTIONS="-nodefaults -machine accel=qtest" +export VALGRIND_QEMU= +export IMGKEYSECRET= +export IMGOPTSSYNTAX=false + +for r +do + + if $group + then + # arg after -g + group_list=`sed -n <"$source_iotests/group" -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{ +s/ .*//p +}'` + if [ -z "$group_list" ] + then + echo "Group \"$r\" is empty or not defined?" + exit 1 + fi + [ ! -s $tmp.list ] && touch $tmp.list + for t in $group_list + do + if grep -s "^$t\$" $tmp.list >/dev/null + then + : + else + echo "$t" >>$tmp.list + fi + done + group=false + continue + + elif $xgroup + then + # arg after -x + # Populate $tmp.list with all tests + awk '/^[0-9]{3,}/ {print $1}' "${source_iotests}/group" > $tmp.list 2>/dev/null + group_list=`sed -n <"$source_iotests/group" -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{ +s/ .*//p +}'` + if [ -z "$group_list" ] + then + echo "Group \"$r\" is empty or not defined?" + exit 1 + fi + numsed=0 + rm -f $tmp.sed + for t in $group_list + do + if [ $numsed -gt 100 ] + then + sed -f $tmp.sed <$tmp.list >$tmp.tmp + mv $tmp.tmp $tmp.list + numsed=0 + rm -f $tmp.sed + fi + echo "/^$t\$/d" >>$tmp.sed + numsed=`expr $numsed + 1` + done + sed -f $tmp.sed <$tmp.list >$tmp.tmp + mv $tmp.tmp $tmp.list + xgroup=false + continue + + elif $imgopts + then + IMGOPTS="$r" + imgopts=false + continue + elif $cachemode + then + CACHEMODE="$r" + CACHEMODE_IS_DEFAULT=false + cachemode=false + continue + fi + + xpand=true + case "$r" + in + + -\? | -h | --help) # usage + echo "Usage: $0 [options] [testlist]"' + +common options + -v verbose + -d debug + +image format options + -raw test raw (default) + -bochs test bochs + -cloop test cloop + -parallels test parallels + -qcow test qcow + -qcow2 test qcow2 + -qed test qed + -vdi test vdi + -vpc test vpc + -vhdx test vhdx + -vmdk test vmdk + -luks test luks + +image protocol options + -file test file (default) + -rbd test rbd + -sheepdog test sheepdog + -nbd test nbd + -ssh test ssh + -nfs test nfs + -vxhs test vxhs + +other options + -xdiff graphical mode diff + -nocache use O_DIRECT on backing file + -misalign misalign memory allocations + -n show me, do not run tests + -o options -o options to pass to qemu-img create/convert + -T output timestamps + -c mode cache mode + +testlist options + -g group[,group...] include tests from these groups + -x group[,group...] exclude tests from these groups + NNN include test NNN + NNN-NNN include test range (eg. 012-021) +' + exit 0 + ;; + + -raw) + IMGFMT=raw + xpand=false + ;; + + -bochs) + IMGFMT=bochs + IMGFMT_GENERIC=false + xpand=false + ;; + + -cloop) + IMGFMT=cloop + IMGFMT_GENERIC=false + xpand=false + ;; + + -parallels) + IMGFMT=parallels + IMGFMT_GENERIC=false + xpand=false + ;; + + -qcow) + IMGFMT=qcow + xpand=false + ;; + + -qcow2) + IMGFMT=qcow2 + xpand=false + ;; + + -luks) + IMGOPTSSYNTAX=true + IMGFMT=luks + IMGKEYSECRET=123456 + xpand=false + ;; + + -qed) + IMGFMT=qed + xpand=false + ;; + + -vdi) + IMGFMT=vdi + xpand=false + ;; + + -vmdk) + IMGFMT=vmdk + xpand=false + ;; + + -vpc) + IMGFMT=vpc + xpand=false + ;; + + -vhdx) + IMGFMT=vhdx + xpand=false + ;; + + -file) + IMGPROTO=file + xpand=false + ;; + + -rbd) + IMGPROTO=rbd + xpand=false + ;; + + -sheepdog) + IMGPROTO=sheepdog + xpand=false + ;; + + -nbd) + IMGPROTO=nbd + xpand=false + ;; + + -vxhs) + IMGPROTO=vxhs + xpand=false + ;; + + -ssh) + IMGPROTO=ssh + xpand=false + ;; + + -nfs) + IMGPROTO=nfs + xpand=false + ;; + + -nocache) + CACHEMODE="none" + CACHEMODE_IS_DEFAULT=false + xpand=false + ;; + + -misalign) + QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --misalign" + xpand=false + ;; + + -valgrind) + VALGRIND_QEMU='y' + xpand=false + ;; + + -g) # -g group ... pick from group file + group=true + xpand=false + ;; + + -xdiff) # graphical diff mode + xpand=false + + if [ ! -z "$DISPLAY" ] + then + command -v xdiff >/dev/null 2>&1 && diff=xdiff + command -v gdiff >/dev/null 2>&1 && diff=gdiff + command -v tkdiff >/dev/null 2>&1 && diff=tkdiff + command -v xxdiff >/dev/null 2>&1 && diff=xxdiff + fi + ;; + + -n) # show me, don't do it + showme=true + xpand=false + ;; + -o) + imgopts=true + xpand=false + ;; + -c) + cachemode=true + xpand=false + ;; + -T) # turn on timestamp output + timestamp=true + xpand=false + ;; + + -v) + verbose=true + xpand=false + ;; + -d) + debug=true + xpand=false + ;; + -x) # -x group ... exclude from group file + xgroup=true + xpand=false + ;; + '[0-9][0-9][0-9] [0-9][0-9][0-9][0-9]') + echo "No tests?" + status=1 + exit $status + ;; + + [0-9]*-[0-9]*) + eval `echo $r | sed -e 's/^/start=/' -e 's/-/ end=/'` + ;; + + [0-9]*-) + eval `echo $r | sed -e 's/^/start=/' -e 's/-//'` + end=`echo [0-9][0-9][0-9] [0-9][0-9][0-9][0-9] | sed -e 's/\[0-9]//g' -e 's/ *$//' -e 's/.* //'` + if [ -z "$end" ] + then + echo "No tests in range \"$r\"?" + status=1 + exit $status + fi + ;; + + *) + start=$r + end=$r + ;; + + esac + + # get rid of leading 0s as can be interpreted as octal + start=`echo $start | sed 's/^0*//'` + end=`echo $end | sed 's/^0*//'` + + if $xpand + then + have_test_arg=true + awk /dev/null + then + # in group file ... OK + echo $id >>$tmp.list + else + if [ -f expunged ] && $expunge && egrep "^$id([ ]|\$)" expunged >/dev/null + then + # expunged ... will be reported, but not run, later + echo $id >>$tmp.list + else + # oops + if [ "$start" == "$end" -a "$id" == "$end" ] + then + echo "$id - unknown test" + exit 1 + else + echo "$id - unknown test, ignored" + fi + fi + fi + done || exit 1 + fi + +done + +# Set qemu-io cache mode with $CACHEMODE we have +QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --cache $CACHEMODE" + +QEMU_IO_OPTIONS_NO_FMT="$QEMU_IO_OPTIONS" +if [ "$IMGOPTSSYNTAX" != "true" ]; then + QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS -f $IMGFMT" +fi + +# Set default options for qemu-img create -o if they were not specified +if [ "$IMGFMT" == "qcow2" ] && ! (echo "$IMGOPTS" | grep "compat=" > /dev/null); then + IMGOPTS=$(_optstr_add "$IMGOPTS" "compat=1.1") +fi +if [ "$IMGFMT" == "luks" ] && ! (echo "$IMGOPTS" | grep "iter-time=" > /dev/null); then + IMGOPTS=$(_optstr_add "$IMGOPTS" "iter-time=10") +fi + +if [ -z "$SAMPLE_IMG_DIR" ]; then + SAMPLE_IMG_DIR="$source_iotests/sample_images" +fi + +export TEST_DIR +export SAMPLE_IMG_DIR + +if [ -s $tmp.list ] +then + # found some valid test numbers ... this is good + : +else + if $have_test_arg + then + # had test numbers, but none in group file ... do nothing + touch $tmp.list + else + # no test numbers, do everything from group file + sed -n -e '/^[0-9][0-9][0-9]*/s/[ ].*//p' <"$source_iotests/group" >$tmp.list + fi +fi + +# should be sort -n, but this did not work for Linux when this +# was ported from IRIX +# +list=`sort $tmp.list` +rm -f $tmp.list $tmp.tmp $tmp.sed + +if [ -z "$QEMU_PROG" ] +then + if [ -x "$build_iotests/qemu" ]; then + export QEMU_PROG="$build_iotests/qemu" + elif [ -x "$build_root/$arch-softmmu/qemu-system-$arch" ]; then + export QEMU_PROG="$build_root/$arch-softmmu/qemu-system-$arch" + else + pushd "$build_root" > /dev/null + for binary in *-softmmu/qemu-system-* + do + if [ -x "$binary" ] + then + export QEMU_PROG="$build_root/$binary" + break + fi + done + popd > /dev/null + [ "$QEMU_PROG" = "" ] && _init_error "qemu not found" + fi +fi +export QEMU_PROG=$(realpath -- "$(type -p "$QEMU_PROG")") + +if [ -z "$QEMU_IMG_PROG" ]; then + if [ -x "$build_iotests/qemu-img" ]; then + export QEMU_IMG_PROG="$build_iotests/qemu-img" + elif [ -x "$build_root/qemu-img" ]; then + export QEMU_IMG_PROG="$build_root/qemu-img" + else + _init_error "qemu-img not found" + fi +fi +export QEMU_IMG_PROG=$(realpath -- "$(type -p "$QEMU_IMG_PROG")") + +if [ -z "$QEMU_IO_PROG" ]; then + if [ -x "$build_iotests/qemu-io" ]; then + export QEMU_IO_PROG="$build_iotests/qemu-io" + elif [ -x "$build_root/qemu-io" ]; then + export QEMU_IO_PROG="$build_root/qemu-io" + else + _init_error "qemu-io not found" + fi +fi +export QEMU_IO_PROG=$(realpath -- "$(type -p "$QEMU_IO_PROG")") + +if [ -z $QEMU_NBD_PROG ]; then + if [ -x "$build_iotests/qemu-nbd" ]; then + export QEMU_NBD_PROG="$build_iotests/qemu-nbd" + elif [ -x "$build_root/qemu-nbd" ]; then + export QEMU_NBD_PROG="$build_root/qemu-nbd" + else + _init_error "qemu-nbd not found" + fi +fi +export QEMU_NBD_PROG=$(realpath -- "$(type -p "$QEMU_NBD_PROG")") + +if [ -z "$QEMU_VXHS_PROG" ]; then + export QEMU_VXHS_PROG="`set_prog_path qnio_server`" +fi + +if [ -x "$build_iotests/socket_scm_helper" ] +then + export SOCKET_SCM_HELPER="$build_iotests/socket_scm_helper" +fi + +default_machine=$($QEMU_PROG -machine help | sed -n '/(default)/ s/ .*//p') +default_alias_machine=$($QEMU_PROG -machine help | \ + sed -n "/(alias of $default_machine)/ { s/ .*//p; q; }") +if [[ "$default_alias_machine" ]]; then + default_machine="$default_alias_machine" +fi + +export QEMU_DEFAULT_MACHINE="$default_machine" TIMESTAMP_FILE=check.time-$IMGPROTO-$IMGFMT diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common deleted file mode 100644 index 365d3c4349..0000000000 --- a/tests/qemu-iotests/common +++ /dev/null @@ -1,552 +0,0 @@ -#!/bin/bash -# -# Copyright (C) 2009 Red Hat, Inc. -# Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it would be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# -# -# common procedures for QA scripts -# - -_full_imgfmt_details() -{ - if [ -n "$IMGOPTS" ]; then - echo "$IMGFMT ($IMGOPTS)" - else - echo "$IMGFMT" - fi -} - -_full_platform_details() -{ - os=`uname -s` - host=`hostname -s` - kernel=`uname -r` - platform=`uname -m` - echo "$os/$platform $host $kernel" -} - -# $1 = prog to look for -set_prog_path() -{ - p=`command -v $1 2> /dev/null` - if [ -n "$p" -a -x "$p" ]; then - realpath -- "$(type -p "$p")" - else - return 1 - fi -} - -if [ -z "$TEST_DIR" ]; then - TEST_DIR=`pwd`/scratch -fi - -if [ ! -e "$TEST_DIR" ]; then - mkdir "$TEST_DIR" -fi - -diff="diff -u" -verbose=false -debug=false -group=false -xgroup=false -imgopts=false -showme=false -sortme=false -expunge=true -have_test_arg=false -cachemode=false - -tmp="${TEST_DIR}"/$$ -rm -f $tmp.list $tmp.tmp $tmp.sed - -export IMGFMT=raw -export IMGFMT_GENERIC=true -export IMGPROTO=file -export IMGOPTS="" -export CACHEMODE="writeback" -export QEMU_IO_OPTIONS="" -export QEMU_IO_OPTIONS_NO_FMT="" -export CACHEMODE_IS_DEFAULT=true -export QEMU_OPTIONS="-nodefaults -machine accel=qtest" -export VALGRIND_QEMU= -export IMGKEYSECRET= -export IMGOPTSSYNTAX=false - -for r -do - - if $group - then - # arg after -g - group_list=`sed -n <"$source_iotests/group" -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{ -s/ .*//p -}'` - if [ -z "$group_list" ] - then - echo "Group \"$r\" is empty or not defined?" - exit 1 - fi - [ ! -s $tmp.list ] && touch $tmp.list - for t in $group_list - do - if grep -s "^$t\$" $tmp.list >/dev/null - then - : - else - echo "$t" >>$tmp.list - fi - done - group=false - continue - - elif $xgroup - then - # arg after -x - # Populate $tmp.list with all tests - awk '/^[0-9]{3,}/ {print $1}' "${source_iotests}/group" > $tmp.list 2>/dev/null - group_list=`sed -n <"$source_iotests/group" -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{ -s/ .*//p -}'` - if [ -z "$group_list" ] - then - echo "Group \"$r\" is empty or not defined?" - exit 1 - fi - numsed=0 - rm -f $tmp.sed - for t in $group_list - do - if [ $numsed -gt 100 ] - then - sed -f $tmp.sed <$tmp.list >$tmp.tmp - mv $tmp.tmp $tmp.list - numsed=0 - rm -f $tmp.sed - fi - echo "/^$t\$/d" >>$tmp.sed - numsed=`expr $numsed + 1` - done - sed -f $tmp.sed <$tmp.list >$tmp.tmp - mv $tmp.tmp $tmp.list - xgroup=false - continue - - elif $imgopts - then - IMGOPTS="$r" - imgopts=false - continue - elif $cachemode - then - CACHEMODE="$r" - CACHEMODE_IS_DEFAULT=false - cachemode=false - continue - fi - - xpand=true - case "$r" - in - - -\? | -h | --help) # usage - echo "Usage: $0 [options] [testlist]"' - -common options - -v verbose - -d debug - -image format options - -raw test raw (default) - -bochs test bochs - -cloop test cloop - -parallels test parallels - -qcow test qcow - -qcow2 test qcow2 - -qed test qed - -vdi test vdi - -vpc test vpc - -vhdx test vhdx - -vmdk test vmdk - -luks test luks - -image protocol options - -file test file (default) - -rbd test rbd - -sheepdog test sheepdog - -nbd test nbd - -ssh test ssh - -nfs test nfs - -vxhs test vxhs - -other options - -xdiff graphical mode diff - -nocache use O_DIRECT on backing file - -misalign misalign memory allocations - -n show me, do not run tests - -o options -o options to pass to qemu-img create/convert - -T output timestamps - -c mode cache mode - -testlist options - -g group[,group...] include tests from these groups - -x group[,group...] exclude tests from these groups - NNN include test NNN - NNN-NNN include test range (eg. 012-021) -' - exit 0 - ;; - - -raw) - IMGFMT=raw - xpand=false - ;; - - -bochs) - IMGFMT=bochs - IMGFMT_GENERIC=false - xpand=false - ;; - - -cloop) - IMGFMT=cloop - IMGFMT_GENERIC=false - xpand=false - ;; - - -parallels) - IMGFMT=parallels - IMGFMT_GENERIC=false - xpand=false - ;; - - -qcow) - IMGFMT=qcow - xpand=false - ;; - - -qcow2) - IMGFMT=qcow2 - xpand=false - ;; - - -luks) - IMGOPTSSYNTAX=true - IMGFMT=luks - IMGKEYSECRET=123456 - xpand=false - ;; - - -qed) - IMGFMT=qed - xpand=false - ;; - - -vdi) - IMGFMT=vdi - xpand=false - ;; - - -vmdk) - IMGFMT=vmdk - xpand=false - ;; - - -vpc) - IMGFMT=vpc - xpand=false - ;; - - -vhdx) - IMGFMT=vhdx - xpand=false - ;; - - -file) - IMGPROTO=file - xpand=false - ;; - - -rbd) - IMGPROTO=rbd - xpand=false - ;; - - -sheepdog) - IMGPROTO=sheepdog - xpand=false - ;; - - -nbd) - IMGPROTO=nbd - xpand=false - ;; - - -vxhs) - IMGPROTO=vxhs - xpand=false - ;; - - -ssh) - IMGPROTO=ssh - xpand=false - ;; - - -nfs) - IMGPROTO=nfs - xpand=false - ;; - - -nocache) - CACHEMODE="none" - CACHEMODE_IS_DEFAULT=false - xpand=false - ;; - - -misalign) - QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --misalign" - xpand=false - ;; - - -valgrind) - VALGRIND_QEMU='y' - xpand=false - ;; - - -g) # -g group ... pick from group file - group=true - xpand=false - ;; - - -xdiff) # graphical diff mode - xpand=false - - if [ ! -z "$DISPLAY" ] - then - command -v xdiff >/dev/null 2>&1 && diff=xdiff - command -v gdiff >/dev/null 2>&1 && diff=gdiff - command -v tkdiff >/dev/null 2>&1 && diff=tkdiff - command -v xxdiff >/dev/null 2>&1 && diff=xxdiff - fi - ;; - - -n) # show me, don't do it - showme=true - xpand=false - ;; - -o) - imgopts=true - xpand=false - ;; - -c) - cachemode=true - xpand=false - ;; - -T) # turn on timestamp output - timestamp=true - xpand=false - ;; - - -v) - verbose=true - xpand=false - ;; - -d) - debug=true - xpand=false - ;; - -x) # -x group ... exclude from group file - xgroup=true - xpand=false - ;; - '[0-9][0-9][0-9] [0-9][0-9][0-9][0-9]') - echo "No tests?" - status=1 - exit $status - ;; - - [0-9]*-[0-9]*) - eval `echo $r | sed -e 's/^/start=/' -e 's/-/ end=/'` - ;; - - [0-9]*-) - eval `echo $r | sed -e 's/^/start=/' -e 's/-//'` - end=`echo [0-9][0-9][0-9] [0-9][0-9][0-9][0-9] | sed -e 's/\[0-9]//g' -e 's/ *$//' -e 's/.* //'` - if [ -z "$end" ] - then - echo "No tests in range \"$r\"?" - status=1 - exit $status - fi - ;; - - *) - start=$r - end=$r - ;; - - esac - - # get rid of leading 0s as can be interpreted as octal - start=`echo $start | sed 's/^0*//'` - end=`echo $end | sed 's/^0*//'` - - if $xpand - then - have_test_arg=true - awk /dev/null - then - # in group file ... OK - echo $id >>$tmp.list - else - if [ -f expunged ] && $expunge && egrep "^$id([ ]|\$)" expunged >/dev/null - then - # expunged ... will be reported, but not run, later - echo $id >>$tmp.list - else - # oops - if [ "$start" == "$end" -a "$id" == "$end" ] - then - echo "$id - unknown test" - exit 1 - else - echo "$id - unknown test, ignored" - fi - fi - fi - done || exit 1 - fi - -done - -# Set qemu-io cache mode with $CACHEMODE we have -QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --cache $CACHEMODE" - -QEMU_IO_OPTIONS_NO_FMT="$QEMU_IO_OPTIONS" -if [ "$IMGOPTSSYNTAX" != "true" ]; then - QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS -f $IMGFMT" -fi - -# Set default options for qemu-img create -o if they were not specified -if [ "$IMGFMT" == "qcow2" ] && ! (echo "$IMGOPTS" | grep "compat=" > /dev/null); then - IMGOPTS=$(_optstr_add "$IMGOPTS" "compat=1.1") -fi -if [ "$IMGFMT" == "luks" ] && ! (echo "$IMGOPTS" | grep "iter-time=" > /dev/null); then - IMGOPTS=$(_optstr_add "$IMGOPTS" "iter-time=10") -fi - -if [ -z "$SAMPLE_IMG_DIR" ]; then - SAMPLE_IMG_DIR="$source_iotests/sample_images" -fi - -export TEST_DIR -export SAMPLE_IMG_DIR - -if [ -s $tmp.list ] -then - # found some valid test numbers ... this is good - : -else - if $have_test_arg - then - # had test numbers, but none in group file ... do nothing - touch $tmp.list - else - # no test numbers, do everything from group file - sed -n -e '/^[0-9][0-9][0-9]*/s/[ ].*//p' <"$source_iotests/group" >$tmp.list - fi -fi - -# should be sort -n, but this did not work for Linux when this -# was ported from IRIX -# -list=`sort $tmp.list` -rm -f $tmp.list $tmp.tmp $tmp.sed - -if [ -z "$QEMU_PROG" ] -then - if [ -x "$build_iotests/qemu" ]; then - export QEMU_PROG="$build_iotests/qemu" - elif [ -x "$build_root/$arch-softmmu/qemu-system-$arch" ]; then - export QEMU_PROG="$build_root/$arch-softmmu/qemu-system-$arch" - else - pushd "$build_root" > /dev/null - for binary in *-softmmu/qemu-system-* - do - if [ -x "$binary" ] - then - export QEMU_PROG="$build_root/$binary" - break - fi - done - popd > /dev/null - [ "$QEMU_PROG" = "" ] && _init_error "qemu not found" - fi -fi -export QEMU_PROG=$(realpath -- "$(type -p "$QEMU_PROG")") - -if [ -z "$QEMU_IMG_PROG" ]; then - if [ -x "$build_iotests/qemu-img" ]; then - export QEMU_IMG_PROG="$build_iotests/qemu-img" - elif [ -x "$build_root/qemu-img" ]; then - export QEMU_IMG_PROG="$build_root/qemu-img" - else - _init_error "qemu-img not found" - fi -fi -export QEMU_IMG_PROG=$(realpath -- "$(type -p "$QEMU_IMG_PROG")") - -if [ -z "$QEMU_IO_PROG" ]; then - if [ -x "$build_iotests/qemu-io" ]; then - export QEMU_IO_PROG="$build_iotests/qemu-io" - elif [ -x "$build_root/qemu-io" ]; then - export QEMU_IO_PROG="$build_root/qemu-io" - else - _init_error "qemu-io not found" - fi -fi -export QEMU_IO_PROG=$(realpath -- "$(type -p "$QEMU_IO_PROG")") - -if [ -z $QEMU_NBD_PROG ]; then - if [ -x "$build_iotests/qemu-nbd" ]; then - export QEMU_NBD_PROG="$build_iotests/qemu-nbd" - elif [ -x "$build_root/qemu-nbd" ]; then - export QEMU_NBD_PROG="$build_root/qemu-nbd" - else - _init_error "qemu-nbd not found" - fi -fi -export QEMU_NBD_PROG=$(realpath -- "$(type -p "$QEMU_NBD_PROG")") - -if [ -z "$QEMU_VXHS_PROG" ]; then - export QEMU_VXHS_PROG="`set_prog_path qnio_server`" -fi - -if [ -x "$build_iotests/socket_scm_helper" ] -then - export SOCKET_SCM_HELPER="$build_iotests/socket_scm_helper" -fi - -default_machine=$($QEMU_PROG -machine help | sed -n '/(default)/ s/ .*//p') -default_alias_machine=$($QEMU_PROG -machine help | \ - sed -n "/(alias of $default_machine)/ { s/ .*//p; q; }") -if [[ "$default_alias_machine" ]]; then - default_machine="$default_alias_machine" -fi - -export QEMU_DEFAULT_MACHINE="$default_machine" From 6858eba09ed69e64c8d05d4f4b8167b42a011b7f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 29 Jun 2017 19:32:21 +0200 Subject: [PATCH 33/54] block: Introduce BdrvChildRole.update_filename There is no good reason for bdrv_drop_intermediate() to know the active layer above the subchain it is operating on - even more so, because the assumption that there is a single active layer above it is not generally true. In order to prepare removal of the active parameter, use a BdrvChildRole callback to update the backing file string in the overlay image instead of directly calling bdrv_change_backing_file(). Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block.c | 33 ++++++++++++++++++++++++++++----- include/block/block_int.h | 6 ++++++ 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/block.c b/block.c index ef5af81f66..ab354ba82a 100644 --- a/block.c +++ b/block.c @@ -981,6 +981,21 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options, *child_flags = flags; } +static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, + const char *filename, Error **errp) +{ + BlockDriverState *parent = c->opaque; + int ret; + + ret = bdrv_change_backing_file(parent, filename, + base->drv ? base->drv->format_name : ""); + if (ret < 0) { + error_setg_errno(errp, ret, "Could not update backing file link"); + } + + return ret; +} + const BdrvChildRole child_backing = { .get_parent_desc = bdrv_child_get_parent_desc, .attach = bdrv_backing_attach, @@ -989,6 +1004,7 @@ const BdrvChildRole child_backing = { .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, .inactivate = bdrv_child_cb_inactivate, + .update_filename = bdrv_backing_update_filename, }; static int bdrv_open_flags(BlockDriverState *bs, int flags) @@ -3470,6 +3486,8 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, Error *local_err = NULL; int ret = -EIO; + bdrv_ref(top); + if (!top->drv || !base->drv) { goto exit; } @@ -3494,11 +3512,15 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, } /* success - we can delete the intermediate states, and link top->base */ - backing_file_str = backing_file_str ? backing_file_str : base->filename; - ret = bdrv_change_backing_file(new_top_bs, backing_file_str, - base->drv ? base->drv->format_name : ""); - if (ret) { - goto exit; + if (new_top_bs->backing->role->update_filename) { + backing_file_str = backing_file_str ? backing_file_str : base->filename; + ret = new_top_bs->backing->role->update_filename(new_top_bs->backing, + base, backing_file_str, + &local_err); + if (ret < 0) { + bdrv_set_backing_hd(new_top_bs, top, &error_abort); + goto exit; + } } bdrv_set_backing_hd(new_top_bs, base, &local_err); @@ -3510,6 +3532,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, ret = 0; exit: + bdrv_unref(top); return ret; } diff --git a/include/block/block_int.h b/include/block/block_int.h index 79366b94b5..7e8a206239 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -544,6 +544,12 @@ struct BdrvChildRole { void (*attach)(BdrvChild *child); void (*detach)(BdrvChild *child); + + /* Notifies the parent that the filename of its child has changed (e.g. + * because the direct child was removed from the backing chain), so that it + * can update its reference. */ + int (*update_filename)(BdrvChild *child, BlockDriverState *new_base, + const char *filename, Error **errp); }; extern const BdrvChildRole child_file; From 61f09cea01391eaa23ea3bc78ab37a7d2da565fb Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 19 Sep 2017 16:22:54 +0200 Subject: [PATCH 34/54] commit: Support multiple roots above top node This changes the commit block job to support operation in a graph where there is more than a single active layer that references the top node. This involves inserting the commit filter node not only on the path between the given active node and the top node, but between the top node and all of its parents. On completion, bdrv_drop_intermediate() must consider all parents for updating the backing file link. These parents may be backing files themselves and as such read-only; reopen them temporarily if necessary. Previously this was achieved by the bdrv_reopen() calls in the commit block job that made overlay_bs read-write for the whole duration of the block job, even though write access is only needed on completion. Now that we consider all parents, overlay_bs is meaningless. It is left in place in this commit, but we'll remove it soon. Signed-off-by: Kevin Wolf --- block.c | 68 +++++++++++++++++++++++++++++--------------------- block/commit.c | 2 +- 2 files changed, 41 insertions(+), 29 deletions(-) diff --git a/block.c b/block.c index ab354ba82a..1b098d4d09 100644 --- a/block.c +++ b/block.c @@ -985,14 +985,26 @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, const char *filename, Error **errp) { BlockDriverState *parent = c->opaque; + int orig_flags = bdrv_get_flags(parent); int ret; + if (!(orig_flags & BDRV_O_RDWR)) { + ret = bdrv_reopen(parent, orig_flags | BDRV_O_RDWR, errp); + if (ret < 0) { + return ret; + } + } + ret = bdrv_change_backing_file(parent, filename, base->drv ? base->drv->format_name : ""); if (ret < 0) { error_setg_errno(errp, ret, "Could not update backing file link"); } + if (!(orig_flags & BDRV_O_RDWR)) { + bdrv_reopen(parent, orig_flags, NULL); + } + return ret; } @@ -3482,7 +3494,7 @@ BlockDriverState *bdrv_find_base(BlockDriverState *bs) int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, BlockDriverState *base, const char *backing_file_str) { - BlockDriverState *new_top_bs = NULL; + BdrvChild *c, *next; Error *local_err = NULL; int ret = -EIO; @@ -3492,42 +3504,42 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, goto exit; } - new_top_bs = bdrv_find_overlay(active, top); - - if (new_top_bs == NULL) { - /* we could not find the image above 'top', this is an error */ - goto exit; - } - - /* special case of new_top_bs->backing->bs already pointing to base - nothing - * to do, no intermediate images */ - if (backing_bs(new_top_bs) == base) { - ret = 0; - goto exit; - } - /* Make sure that base is in the backing chain of top */ if (!bdrv_chain_contains(top, base)) { goto exit; } /* success - we can delete the intermediate states, and link top->base */ - if (new_top_bs->backing->role->update_filename) { - backing_file_str = backing_file_str ? backing_file_str : base->filename; - ret = new_top_bs->backing->role->update_filename(new_top_bs->backing, - base, backing_file_str, - &local_err); - if (ret < 0) { - bdrv_set_backing_hd(new_top_bs, top, &error_abort); + backing_file_str = backing_file_str ? backing_file_str : base->filename; + + QLIST_FOREACH_SAFE(c, &top->parents, next_parent, next) { + /* Check whether we are allowed to switch c from top to base */ + GSList *ignore_children = g_slist_prepend(NULL, c); + bdrv_check_update_perm(base, NULL, c->perm, c->shared_perm, + ignore_children, &local_err); + if (local_err) { + ret = -EPERM; + error_report_err(local_err); goto exit; } - } + g_slist_free(ignore_children); - bdrv_set_backing_hd(new_top_bs, base, &local_err); - if (local_err) { - ret = -EPERM; - error_report_err(local_err); - goto exit; + /* If so, update the backing file path in the image file */ + if (c->role->update_filename) { + ret = c->role->update_filename(c, base, backing_file_str, + &local_err); + if (ret < 0) { + bdrv_abort_perm_update(base); + error_report_err(local_err); + goto exit; + } + } + + /* Do the actual switch in the in-memory graph. + * Completes bdrv_check_update_perm() transaction internally. */ + bdrv_ref(base); + bdrv_replace_child(c, base); + bdrv_unref(top); } ret = 0; diff --git a/block/commit.c b/block/commit.c index 8f0e83578a..610e1cd8f5 100644 --- a/block/commit.c +++ b/block/commit.c @@ -350,7 +350,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, error_propagate(errp, local_err); goto fail; } - bdrv_set_backing_hd(overlay_bs, commit_top_bs, &local_err); + bdrv_replace_node(top, commit_top_bs, &local_err); if (local_err) { bdrv_unref(commit_top_bs); commit_top_bs = NULL; From 72538537d818d2870104448ba819b91dc70c241a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 29 Jun 2017 18:54:24 +0200 Subject: [PATCH 35/54] qemu-iotests: Allow QMP pretty printing in common.qemu QMP responses to certain commands can become quite long, which doesn't only make reading them hard, but also means that the maximum line length in patch emails can be exceeded. Allow tests to switch to QMP pretty printing, which results in more, but shorter lines. We also need to make sure to keep indentation in the response for this to work as expected. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- tests/qemu-iotests/common.qemu | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/qemu-iotests/common.qemu b/tests/qemu-iotests/common.qemu index 9f9aecc9df..7b3052dc79 100644 --- a/tests/qemu-iotests/common.qemu +++ b/tests/qemu-iotests/common.qemu @@ -56,13 +56,13 @@ function _timed_wait_for() shift QEMU_STATUS[$h]=0 - while read -t ${QEMU_COMM_TIMEOUT} resp <&${QEMU_OUT[$h]} + while IFS= read -t ${QEMU_COMM_TIMEOUT} resp <&${QEMU_OUT[$h]} do if [ -z "${silent}" ]; then echo "${resp}" | _filter_testdir | _filter_qemu \ | _filter_qemu_io | _filter_qmp | _filter_hmp fi - grep -q "${*}" < <(echo ${resp}) + grep -q "${*}" < <(echo "${resp}") if [ $? -eq 0 ]; then return fi @@ -130,6 +130,7 @@ function _send_qemu_cmd() # $qemu_comm_method: set this variable to 'monitor' (case insensitive) # to use the QEMU HMP monitor for communication. # Otherwise, the default of QMP is used. +# $qmp_pretty: Set this variable to 'y' to enable QMP pretty printing. # $keep_stderr: Set this variable to 'y' to keep QEMU's stderr output on stderr. # If this variable is empty, stderr will be redirected to stdout. # Returns: @@ -146,7 +147,11 @@ function _launch_qemu() comm="-monitor stdio" else local qemu_comm_method="qmp" - comm="-monitor none -qmp stdio" + if [ "$qmp_pretty" = "y" ]; then + comm="-monitor none -qmp-pretty stdio" + else + comm="-monitor none -qmp stdio" + fi fi fifo_out=${QEMU_FIFO_OUT}_${_QEMU_HANDLE} @@ -193,6 +198,9 @@ function _launch_qemu() then # Don't print response, since it has version information in it silent=yes _timed_wait_for ${_QEMU_HANDLE} "capabilities" + if [ "$qmp_pretty" = "y" ]; then + silent=yes _timed_wait_for ${_QEMU_HANDLE} "^}" + fi fi QEMU_HANDLE=${_QEMU_HANDLE} let _QEMU_HANDLE++ From 7c61a4a3f90763527f212abc6d28e36df9e32202 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 29 Jun 2017 19:56:48 +0200 Subject: [PATCH 36/54] qemu-iotests: Test commit block job where top has two parents Signed-off-by: Kevin Wolf --- tests/qemu-iotests/191 | 153 +++++++ tests/qemu-iotests/191.out | 827 +++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/group | 1 + 3 files changed, 981 insertions(+) create mode 100755 tests/qemu-iotests/191 create mode 100644 tests/qemu-iotests/191.out diff --git a/tests/qemu-iotests/191 b/tests/qemu-iotests/191 new file mode 100755 index 0000000000..ac2b88fd78 --- /dev/null +++ b/tests/qemu-iotests/191 @@ -0,0 +1,153 @@ +#!/bin/bash +# +# Test commit block job where top has two parents +# +# Copyright (C) 2017 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=kwolf@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +status=1 # failure is the default! + +MIG_SOCKET="${TEST_DIR}/migrate" + +_cleanup() +{ + rm -f "${TEST_IMG}.mid" + rm -f "${TEST_IMG}.ovl2" + rm -f "${TEST_IMG}.ovl3" + _cleanup_test_img + _cleanup_qemu +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.qemu + +_supported_fmt qcow2 +_unsupported_imgopts compat=0.10 +_supported_proto file +_supported_os Linux + +size=64M + +echo +echo === Preparing and starting VM === +echo + +TEST_IMG="${TEST_IMG}.base" _make_test_img $size +TEST_IMG="${TEST_IMG}.mid" _make_test_img -b "${TEST_IMG}.base" +_make_test_img -b "${TEST_IMG}.mid" +TEST_IMG="${TEST_IMG}.ovl2" _make_test_img -b "${TEST_IMG}.mid" + +$QEMU_IO -c 'write -P 0x55 1M 64k' "${TEST_IMG}.mid" | _filter_qemu_io + +qemu_comm_method="qmp" +qmp_pretty="y" + +_launch_qemu \ + -blockdev "driver=${IMGFMT},file.driver=file,file.filename=${TEST_IMG}.base,node-name=base" \ + -blockdev "driver=${IMGFMT},file.driver=file,file.filename=${TEST_IMG}.mid,node-name=mid,backing=base" \ + -blockdev "driver=${IMGFMT},file.driver=file,file.filename=${TEST_IMG},node-name=top,backing=mid" \ + -blockdev "driver=${IMGFMT},file.driver=file,file.filename=${TEST_IMG}.ovl2,node-name=top2,backing=mid" +h=$QEMU_HANDLE +_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" '^}' + +echo +echo === Perform commit job === +echo + +_send_qemu_cmd $h \ + "{ 'execute': 'block-commit', + 'arguments': { 'job-id': 'commit0', + 'device': 'top', + 'base':'$TEST_IMG.base', + 'top': '$TEST_IMG.mid' } }" \ + "BLOCK_JOB_COMPLETED" +_send_qemu_cmd $h "" "^}" + +echo +echo === Check that both top and top2 point to base now === +echo + +_send_qemu_cmd $h "{ 'execute': 'query-named-block-nodes' }" "^}" | + _filter_generated_node_ids + +_send_qemu_cmd $h "{ 'execute': 'quit' }" "^}" +wait=1 _cleanup_qemu + +_img_info +TEST_IMG="$TEST_IMG.ovl2" _img_info + + +echo +echo === Preparing and starting VM with -drive === +echo + +TEST_IMG="${TEST_IMG}.base" _make_test_img $size +TEST_IMG="${TEST_IMG}.mid" _make_test_img -b "${TEST_IMG}.base" +_make_test_img -b "${TEST_IMG}.mid" +TEST_IMG="${TEST_IMG}.ovl2" _make_test_img -b "${TEST_IMG}.mid" +TEST_IMG="${TEST_IMG}.ovl3" _make_test_img -b "${TEST_IMG}.ovl2" + +$QEMU_IO -c 'write -P 0x55 1M 64k' "${TEST_IMG}.mid" | _filter_qemu_io + +qemu_comm_method="qmp" +qmp_pretty="y" + +_launch_qemu \ + -drive "driver=${IMGFMT},file=${TEST_IMG},node-name=top,backing.node-name=mid" \ + -drive "driver=${IMGFMT},file=${TEST_IMG}.ovl3,node-name=top2,backing.backing=mid" +h=$QEMU_HANDLE +_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" '^}' + +echo +echo === Perform commit job === +echo + +_send_qemu_cmd $h \ + "{ 'execute': 'block-commit', + 'arguments': { 'job-id': 'commit0', + 'device': 'top', + 'base':'$TEST_IMG.base', + 'top': '$TEST_IMG.mid' } }" \ + "BLOCK_JOB_COMPLETED" +_send_qemu_cmd $h "" "^}" + +echo +echo === Check that both top and top2 point to base now === +echo + +_send_qemu_cmd $h "{ 'execute': 'query-named-block-nodes' }" "^}" | + _filter_generated_node_ids + +_send_qemu_cmd $h "{ 'execute': 'quit' }" "^}" +wait=1 _cleanup_qemu + +_img_info +TEST_IMG="$TEST_IMG.ovl2" _img_info + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/191.out b/tests/qemu-iotests/191.out new file mode 100644 index 0000000000..7bfcd2d5d8 --- /dev/null +++ b/tests/qemu-iotests/191.out @@ -0,0 +1,827 @@ +QA output created by 191 + +=== Preparing and starting VM === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT.mid', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.mid +Formatting 'TEST_DIR/t.IMGFMT.ovl2', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.mid +wrote 65536/65536 bytes at offset 1048576 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +{ + "return": { + } +} + +=== Perform commit job === + +{ + "return": { + } +} +{ + "timestamp": { + "seconds": TIMESTAMP, + "microseconds": TIMESTAMP + }, + "event": "BLOCK_JOB_COMPLETED", + "data": { + "device": "commit0", + "len": 67108864, + "offset": 67108864, + "speed": 0, + "type": "commit" + } +} + +=== Check that both top and top2 point to base now === + +{ + "return": [ + { + "iops_rd": 0, + "detect_zeroes": "off", + "image": { + "backing-image": { + "virtual-size": 67108864, + "filename": "TEST_DIR/t.qcow2.base", + "cluster-size": 65536, + "format": "qcow2", + "actual-size": 397312, + "format-specific": { + "type": "qcow2", + "data": { + "compat": "1.1", + "lazy-refcounts": false, + "refcount-bits": 16, + "corrupt": false + } + }, + "dirty-flag": false + }, + "backing-filename-format": "qcow2", + "virtual-size": 67108864, + "filename": "TEST_DIR/t.qcow2.ovl2", + "cluster-size": 65536, + "format": "qcow2", + "actual-size": 200704, + "format-specific": { + "type": "qcow2", + "data": { + "compat": "1.1", + "lazy-refcounts": false, + "refcount-bits": 16, + "corrupt": false + } + }, + "full-backing-filename": "TEST_DIR/t.qcow2.base", + "backing-filename": "TEST_DIR/t.qcow2.base", + "dirty-flag": false + }, + "iops_wr": 0, + "ro": false, + "node-name": "top2", + "backing_file_depth": 1, + "drv": "qcow2", + "iops": 0, + "bps_wr": 0, + "write_threshold": 0, + "backing_file": "TEST_DIR/t.qcow2.base", + "encrypted": false, + "bps": 0, + "bps_rd": 0, + "cache": { + "no-flush": false, + "direct": false, + "writeback": true + }, + "file": "TEST_DIR/t.qcow2.ovl2", + "encryption_key_missing": false + }, + { + "iops_rd": 0, + "detect_zeroes": "off", + "image": { + "virtual-size": 197120, + "filename": "TEST_DIR/t.qcow2.ovl2", + "format": "file", + "actual-size": 200704, + "dirty-flag": false + }, + "iops_wr": 0, + "ro": false, + "node-name": "NODE_NAME", + "backing_file_depth": 0, + "drv": "file", + "iops": 0, + "bps_wr": 0, + "write_threshold": 0, + "encrypted": false, + "bps": 0, + "bps_rd": 0, + "cache": { + "no-flush": false, + "direct": false, + "writeback": true + }, + "file": "TEST_DIR/t.qcow2.ovl2", + "encryption_key_missing": false + }, + { + "iops_rd": 0, + "detect_zeroes": "off", + "image": { + "backing-image": { + "virtual-size": 67108864, + "filename": "TEST_DIR/t.qcow2.base", + "cluster-size": 65536, + "format": "qcow2", + "actual-size": 397312, + "format-specific": { + "type": "qcow2", + "data": { + "compat": "1.1", + "lazy-refcounts": false, + "refcount-bits": 16, + "corrupt": false + } + }, + "dirty-flag": false + }, + "backing-filename-format": "qcow2", + "virtual-size": 67108864, + "filename": "TEST_DIR/t.qcow2", + "cluster-size": 65536, + "format": "qcow2", + "actual-size": 200704, + "format-specific": { + "type": "qcow2", + "data": { + "compat": "1.1", + "lazy-refcounts": false, + "refcount-bits": 16, + "corrupt": false + } + }, + "full-backing-filename": "TEST_DIR/t.qcow2.base", + "backing-filename": "TEST_DIR/t.qcow2.base", + "dirty-flag": false + }, + "iops_wr": 0, + "ro": false, + "node-name": "top", + "backing_file_depth": 1, + "drv": "qcow2", + "iops": 0, + "bps_wr": 0, + "write_threshold": 0, + "backing_file": "TEST_DIR/t.qcow2.base", + "encrypted": false, + "bps": 0, + "bps_rd": 0, + "cache": { + "no-flush": false, + "direct": false, + "writeback": true + }, + "file": "TEST_DIR/t.qcow2", + "encryption_key_missing": false + }, + { + "iops_rd": 0, + "detect_zeroes": "off", + "image": { + "virtual-size": 197120, + "filename": "TEST_DIR/t.qcow2", + "format": "file", + "actual-size": 200704, + "dirty-flag": false + }, + "iops_wr": 0, + "ro": false, + "node-name": "NODE_NAME", + "backing_file_depth": 0, + "drv": "file", + "iops": 0, + "bps_wr": 0, + "write_threshold": 0, + "encrypted": false, + "bps": 0, + "bps_rd": 0, + "cache": { + "no-flush": false, + "direct": false, + "writeback": true + }, + "file": "TEST_DIR/t.qcow2", + "encryption_key_missing": false + }, + { + "iops_rd": 0, + "detect_zeroes": "off", + "image": { + "backing-image": { + "virtual-size": 67108864, + "filename": "TEST_DIR/t.qcow2.base", + "cluster-size": 65536, + "format": "qcow2", + "actual-size": 397312, + "format-specific": { + "type": "qcow2", + "data": { + "compat": "1.1", + "lazy-refcounts": false, + "refcount-bits": 16, + "corrupt": false + } + }, + "dirty-flag": false + }, + "backing-filename-format": "qcow2", + "virtual-size": 67108864, + "filename": "TEST_DIR/t.qcow2.mid", + "cluster-size": 65536, + "format": "qcow2", + "actual-size": 397312, + "format-specific": { + "type": "qcow2", + "data": { + "compat": "1.1", + "lazy-refcounts": false, + "refcount-bits": 16, + "corrupt": false + } + }, + "full-backing-filename": "TEST_DIR/t.qcow2.base", + "backing-filename": "TEST_DIR/t.qcow2.base", + "dirty-flag": false + }, + "iops_wr": 0, + "ro": false, + "node-name": "mid", + "backing_file_depth": 1, + "drv": "qcow2", + "iops": 0, + "bps_wr": 0, + "write_threshold": 0, + "backing_file": "TEST_DIR/t.qcow2.base", + "encrypted": false, + "bps": 0, + "bps_rd": 0, + "cache": { + "no-flush": false, + "direct": false, + "writeback": true + }, + "file": "TEST_DIR/t.qcow2.mid", + "encryption_key_missing": false + }, + { + "iops_rd": 0, + "detect_zeroes": "off", + "image": { + "virtual-size": 393216, + "filename": "TEST_DIR/t.qcow2.mid", + "format": "file", + "actual-size": 397312, + "dirty-flag": false + }, + "iops_wr": 0, + "ro": false, + "node-name": "NODE_NAME", + "backing_file_depth": 0, + "drv": "file", + "iops": 0, + "bps_wr": 0, + "write_threshold": 0, + "encrypted": false, + "bps": 0, + "bps_rd": 0, + "cache": { + "no-flush": false, + "direct": false, + "writeback": true + }, + "file": "TEST_DIR/t.qcow2.mid", + "encryption_key_missing": false + }, + { + "iops_rd": 0, + "detect_zeroes": "off", + "image": { + "virtual-size": 67108864, + "filename": "TEST_DIR/t.qcow2.base", + "cluster-size": 65536, + "format": "qcow2", + "actual-size": 397312, + "format-specific": { + "type": "qcow2", + "data": { + "compat": "1.1", + "lazy-refcounts": false, + "refcount-bits": 16, + "corrupt": false + } + }, + "dirty-flag": false + }, + "iops_wr": 0, + "ro": false, + "node-name": "base", + "backing_file_depth": 0, + "drv": "qcow2", + "iops": 0, + "bps_wr": 0, + "write_threshold": 0, + "encrypted": false, + "bps": 0, + "bps_rd": 0, + "cache": { + "no-flush": false, + "direct": false, + "writeback": true + }, + "file": "TEST_DIR/t.qcow2.base", + "encryption_key_missing": false + }, + { + "iops_rd": 0, + "detect_zeroes": "off", + "image": { + "virtual-size": 393216, + "filename": "TEST_DIR/t.qcow2.base", + "format": "file", + "actual-size": 397312, + "dirty-flag": false + }, + "iops_wr": 0, + "ro": false, + "node-name": "NODE_NAME", + "backing_file_depth": 0, + "drv": "file", + "iops": 0, + "bps_wr": 0, + "write_threshold": 0, + "encrypted": false, + "bps": 0, + "bps_rd": 0, + "cache": { + "no-flush": false, + "direct": false, + "writeback": true + }, + "file": "TEST_DIR/t.qcow2.base", + "encryption_key_missing": false + } + ] +} +{ + "return": { + } +} +{ + "timestamp": { + "seconds": TIMESTAMP, + "microseconds": TIMESTAMP + }, + "event": "SHUTDOWN", + "data": { + "guest": false + } +} +image: TEST_DIR/t.IMGFMT +file format: IMGFMT +virtual size: 64M (67108864 bytes) +cluster_size: 65536 +backing file: TEST_DIR/t.IMGFMT.base +backing file format: IMGFMT +image: TEST_DIR/t.IMGFMT.ovl2 +file format: IMGFMT +virtual size: 64M (67108864 bytes) +cluster_size: 65536 +backing file: TEST_DIR/t.IMGFMT.base +backing file format: IMGFMT + +=== Preparing and starting VM with -drive === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT.mid', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.mid +Formatting 'TEST_DIR/t.IMGFMT.ovl2', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.mid +Formatting 'TEST_DIR/t.IMGFMT.ovl3', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.ovl2 +wrote 65536/65536 bytes at offset 1048576 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +{ + "return": { + } +} + +=== Perform commit job === + +{ + "return": { + } +} +{ + "timestamp": { + "seconds": TIMESTAMP, + "microseconds": TIMESTAMP + }, + "event": "BLOCK_JOB_COMPLETED", + "data": { + "device": "commit0", + "len": 67108864, + "offset": 67108864, + "speed": 0, + "type": "commit" + } +} + +=== Check that both top and top2 point to base now === + +{ + "return": [ + { + "iops_rd": 0, + "detect_zeroes": "off", + "image": { + "backing-image": { + "virtual-size": 67108864, + "filename": "TEST_DIR/t.qcow2.base", + "cluster-size": 65536, + "format": "qcow2", + "actual-size": 397312, + "format-specific": { + "type": "qcow2", + "data": { + "compat": "1.1", + "lazy-refcounts": false, + "refcount-bits": 16, + "corrupt": false + } + }, + "dirty-flag": false + }, + "backing-filename-format": "qcow2", + "virtual-size": 67108864, + "filename": "TEST_DIR/t.qcow2.ovl2", + "cluster-size": 65536, + "format": "qcow2", + "actual-size": 200704, + "format-specific": { + "type": "qcow2", + "data": { + "compat": "1.1", + "lazy-refcounts": false, + "refcount-bits": 16, + "corrupt": false + } + }, + "full-backing-filename": "TEST_DIR/t.qcow2.base", + "backing-filename": "TEST_DIR/t.qcow2.base", + "dirty-flag": false + }, + "iops_wr": 0, + "ro": true, + "node-name": "NODE_NAME", + "backing_file_depth": 1, + "drv": "qcow2", + "iops": 0, + "bps_wr": 0, + "write_threshold": 0, + "backing_file": "TEST_DIR/t.qcow2.base", + "encrypted": false, + "bps": 0, + "bps_rd": 0, + "cache": { + "no-flush": false, + "direct": false, + "writeback": true + }, + "file": "TEST_DIR/t.qcow2.ovl2", + "encryption_key_missing": false + }, + { + "iops_rd": 0, + "detect_zeroes": "off", + "image": { + "virtual-size": 197120, + "filename": "TEST_DIR/t.qcow2.ovl2", + "format": "file", + "actual-size": 200704, + "dirty-flag": false + }, + "iops_wr": 0, + "ro": true, + "node-name": "NODE_NAME", + "backing_file_depth": 0, + "drv": "file", + "iops": 0, + "bps_wr": 0, + "write_threshold": 0, + "encrypted": false, + "bps": 0, + "bps_rd": 0, + "cache": { + "no-flush": false, + "direct": false, + "writeback": true + }, + "file": "TEST_DIR/t.qcow2.ovl2", + "encryption_key_missing": false + }, + { + "iops_rd": 0, + "detect_zeroes": "off", + "image": { + "backing-image": { + "backing-image": { + "virtual-size": 67108864, + "filename": "TEST_DIR/t.qcow2.base", + "cluster-size": 65536, + "format": "qcow2", + "actual-size": 397312, + "format-specific": { + "type": "qcow2", + "data": { + "compat": "1.1", + "lazy-refcounts": false, + "refcount-bits": 16, + "corrupt": false + } + }, + "dirty-flag": false + }, + "backing-filename-format": "qcow2", + "virtual-size": 67108864, + "filename": "TEST_DIR/t.qcow2.ovl2", + "cluster-size": 65536, + "format": "qcow2", + "actual-size": 200704, + "format-specific": { + "type": "qcow2", + "data": { + "compat": "1.1", + "lazy-refcounts": false, + "refcount-bits": 16, + "corrupt": false + } + }, + "full-backing-filename": "TEST_DIR/t.qcow2.base", + "backing-filename": "TEST_DIR/t.qcow2.base", + "dirty-flag": false + }, + "backing-filename-format": "qcow2", + "virtual-size": 67108864, + "filename": "TEST_DIR/t.qcow2.ovl3", + "cluster-size": 65536, + "format": "qcow2", + "actual-size": 200704, + "format-specific": { + "type": "qcow2", + "data": { + "compat": "1.1", + "lazy-refcounts": false, + "refcount-bits": 16, + "corrupt": false + } + }, + "full-backing-filename": "TEST_DIR/t.qcow2.ovl2", + "backing-filename": "TEST_DIR/t.qcow2.ovl2", + "dirty-flag": false + }, + "iops_wr": 0, + "ro": false, + "node-name": "top2", + "backing_file_depth": 2, + "drv": "qcow2", + "iops": 0, + "bps_wr": 0, + "write_threshold": 0, + "backing_file": "TEST_DIR/t.qcow2.ovl2", + "encrypted": false, + "bps": 0, + "bps_rd": 0, + "cache": { + "no-flush": false, + "direct": false, + "writeback": true + }, + "file": "TEST_DIR/t.qcow2.ovl3", + "encryption_key_missing": false + }, + { + "iops_rd": 0, + "detect_zeroes": "off", + "image": { + "virtual-size": 197120, + "filename": "TEST_DIR/t.qcow2.ovl3", + "format": "file", + "actual-size": 200704, + "dirty-flag": false + }, + "iops_wr": 0, + "ro": false, + "node-name": "NODE_NAME", + "backing_file_depth": 0, + "drv": "file", + "iops": 0, + "bps_wr": 0, + "write_threshold": 0, + "encrypted": false, + "bps": 0, + "bps_rd": 0, + "cache": { + "no-flush": false, + "direct": false, + "writeback": true + }, + "file": "TEST_DIR/t.qcow2.ovl3", + "encryption_key_missing": false + }, + { + "iops_rd": 0, + "detect_zeroes": "off", + "image": { + "virtual-size": 67108864, + "filename": "TEST_DIR/t.qcow2.base", + "cluster-size": 65536, + "format": "qcow2", + "actual-size": 397312, + "format-specific": { + "type": "qcow2", + "data": { + "compat": "1.1", + "lazy-refcounts": false, + "refcount-bits": 16, + "corrupt": false + } + }, + "dirty-flag": false + }, + "iops_wr": 0, + "ro": true, + "node-name": "NODE_NAME", + "backing_file_depth": 0, + "drv": "qcow2", + "iops": 0, + "bps_wr": 0, + "write_threshold": 0, + "encrypted": false, + "bps": 0, + "bps_rd": 0, + "cache": { + "no-flush": false, + "direct": false, + "writeback": true + }, + "file": "TEST_DIR/t.qcow2.base", + "encryption_key_missing": false + }, + { + "iops_rd": 0, + "detect_zeroes": "off", + "image": { + "virtual-size": 393216, + "filename": "TEST_DIR/t.qcow2.base", + "format": "file", + "actual-size": 397312, + "dirty-flag": false + }, + "iops_wr": 0, + "ro": true, + "node-name": "NODE_NAME", + "backing_file_depth": 0, + "drv": "file", + "iops": 0, + "bps_wr": 0, + "write_threshold": 0, + "encrypted": false, + "bps": 0, + "bps_rd": 0, + "cache": { + "no-flush": false, + "direct": false, + "writeback": true + }, + "file": "TEST_DIR/t.qcow2.base", + "encryption_key_missing": false + }, + { + "iops_rd": 0, + "detect_zeroes": "off", + "image": { + "backing-image": { + "virtual-size": 67108864, + "filename": "TEST_DIR/t.qcow2.base", + "cluster-size": 65536, + "format": "qcow2", + "actual-size": 397312, + "format-specific": { + "type": "qcow2", + "data": { + "compat": "1.1", + "lazy-refcounts": false, + "refcount-bits": 16, + "corrupt": false + } + }, + "dirty-flag": false + }, + "backing-filename-format": "qcow2", + "virtual-size": 67108864, + "filename": "TEST_DIR/t.qcow2", + "cluster-size": 65536, + "format": "qcow2", + "actual-size": 200704, + "format-specific": { + "type": "qcow2", + "data": { + "compat": "1.1", + "lazy-refcounts": false, + "refcount-bits": 16, + "corrupt": false + } + }, + "full-backing-filename": "TEST_DIR/t.qcow2.base", + "backing-filename": "TEST_DIR/t.qcow2.base", + "dirty-flag": false + }, + "iops_wr": 0, + "ro": false, + "node-name": "top", + "backing_file_depth": 1, + "drv": "qcow2", + "iops": 0, + "bps_wr": 0, + "write_threshold": 0, + "backing_file": "TEST_DIR/t.qcow2.base", + "encrypted": false, + "bps": 0, + "bps_rd": 0, + "cache": { + "no-flush": false, + "direct": false, + "writeback": true + }, + "file": "TEST_DIR/t.qcow2", + "encryption_key_missing": false + }, + { + "iops_rd": 0, + "detect_zeroes": "off", + "image": { + "virtual-size": 197120, + "filename": "TEST_DIR/t.qcow2", + "format": "file", + "actual-size": 200704, + "dirty-flag": false + }, + "iops_wr": 0, + "ro": false, + "node-name": "NODE_NAME", + "backing_file_depth": 0, + "drv": "file", + "iops": 0, + "bps_wr": 0, + "write_threshold": 0, + "encrypted": false, + "bps": 0, + "bps_rd": 0, + "cache": { + "no-flush": false, + "direct": false, + "writeback": true + }, + "file": "TEST_DIR/t.qcow2", + "encryption_key_missing": false + } + ] +} +{ + "return": { + } +} +{ + "timestamp": { + "seconds": TIMESTAMP, + "microseconds": TIMESTAMP + }, + "event": "SHUTDOWN", + "data": { + "guest": false + } +} +image: TEST_DIR/t.IMGFMT +file format: IMGFMT +virtual size: 64M (67108864 bytes) +cluster_size: 65536 +backing file: TEST_DIR/t.IMGFMT.base +backing file format: IMGFMT +image: TEST_DIR/t.IMGFMT.ovl2 +file format: IMGFMT +virtual size: 64M (67108864 bytes) +cluster_size: 65536 +backing file: TEST_DIR/t.IMGFMT.base +backing file format: IMGFMT +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index cdccee319e..595f4fd416 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -188,6 +188,7 @@ 188 rw auto quick 189 rw auto 190 rw auto quick +191 rw auto 192 rw auto quick 194 rw auto migration quick 195 rw auto quick From bde70715b67cc5183b00b445b811c1dfc0f74d2e Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 27 Jun 2017 20:36:18 +0200 Subject: [PATCH 37/54] commit: Remove overlay_bs We don't need to make any assumptions about the graph layout above the top node of the commit operation any more. Remove the use of bdrv_find_overlay() and related variables from the commit job code. bdrv_drop_intermediate() doesn't use the 'active' parameter any more, so we can just drop it. The overlay node was previously added to the block job to get a BLK_PERM_GRAPH_MOD. We really need to respect those permissions in bdrv_drop_intermediate() now, but as long as we haven't figured out yet how BLK_PERM_GRAPH_MOD is actually supposed to work, just leave a TODO comment there. With this change, it is now possible to perform another block job on an overlay node without conflicts. qemu-iotests 030 is changed accordingly. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block.c | 6 ++-- block/commit.c | 62 ++++++++++-------------------------------- include/block/block.h | 3 +- tests/qemu-iotests/030 | 4 --- 4 files changed, 20 insertions(+), 55 deletions(-) diff --git a/block.c b/block.c index 1b098d4d09..46eb1728da 100644 --- a/block.c +++ b/block.c @@ -3491,8 +3491,8 @@ BlockDriverState *bdrv_find_base(BlockDriverState *bs) * if active == top, that is considered an error * */ -int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, - BlockDriverState *base, const char *backing_file_str) +int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, + const char *backing_file_str) { BdrvChild *c, *next; Error *local_err = NULL; @@ -3510,6 +3510,8 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, } /* success - we can delete the intermediate states, and link top->base */ + /* TODO Check graph modification op blockers (BLK_PERM_GRAPH_MOD) once + * we've figured out how they should work. */ backing_file_str = backing_file_str ? backing_file_str : base->filename; QLIST_FOREACH_SAFE(c, &top->parents, next_parent, next) { diff --git a/block/commit.c b/block/commit.c index 610e1cd8f5..5036eec434 100644 --- a/block/commit.c +++ b/block/commit.c @@ -36,13 +36,11 @@ enum { typedef struct CommitBlockJob { BlockJob common; RateLimit limit; - BlockDriverState *active; BlockDriverState *commit_top_bs; BlockBackend *top; BlockBackend *base; BlockdevOnError on_error; int base_flags; - int orig_overlay_flags; char *backing_file_str; } CommitBlockJob; @@ -81,18 +79,15 @@ static void commit_complete(BlockJob *job, void *opaque) { CommitBlockJob *s = container_of(job, CommitBlockJob, common); CommitCompleteData *data = opaque; - BlockDriverState *active = s->active; BlockDriverState *top = blk_bs(s->top); BlockDriverState *base = blk_bs(s->base); - BlockDriverState *overlay_bs = bdrv_find_overlay(active, s->commit_top_bs); + BlockDriverState *commit_top_bs = s->commit_top_bs; int ret = data->ret; bool remove_commit_top_bs = false; - /* Make sure overlay_bs and top stay around until bdrv_set_backing_hd() */ + /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */ bdrv_ref(top); - if (overlay_bs) { - bdrv_ref(overlay_bs); - } + bdrv_ref(commit_top_bs); /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before * the normal backing chain can be restored. */ @@ -100,9 +95,9 @@ static void commit_complete(BlockJob *job, void *opaque) if (!block_job_is_cancelled(&s->common) && ret == 0) { /* success */ - ret = bdrv_drop_intermediate(active, s->commit_top_bs, base, + ret = bdrv_drop_intermediate(s->commit_top_bs, base, s->backing_file_str); - } else if (overlay_bs) { + } else { /* XXX Can (or should) we somehow keep 'consistent read' blocked even * after the failed/cancelled commit job is gone? If we already wrote * something to base, the intermediate images aren't valid any more. */ @@ -115,9 +110,6 @@ static void commit_complete(BlockJob *job, void *opaque) if (s->base_flags != bdrv_get_flags(base)) { bdrv_reopen(base, s->base_flags, NULL); } - if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) { - bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL); - } g_free(s->backing_file_str); blk_unref(s->top); @@ -134,10 +126,13 @@ static void commit_complete(BlockJob *job, void *opaque) * filter driver from the backing chain. Do this as the final step so that * the 'consistent read' permission can be granted. */ if (remove_commit_top_bs) { - bdrv_set_backing_hd(overlay_bs, top, &error_abort); + bdrv_child_try_set_perm(commit_top_bs->backing, 0, BLK_PERM_ALL, + &error_abort); + bdrv_replace_node(commit_top_bs, backing_bs(commit_top_bs), + &error_abort); } - bdrv_unref(overlay_bs); + bdrv_unref(commit_top_bs); bdrv_unref(top); } @@ -283,10 +278,8 @@ void commit_start(const char *job_id, BlockDriverState *bs, { CommitBlockJob *s; BlockReopenQueue *reopen_queue = NULL; - int orig_overlay_flags; int orig_base_flags; BlockDriverState *iter; - BlockDriverState *overlay_bs; BlockDriverState *commit_top_bs = NULL; Error *local_err = NULL; int ret; @@ -297,31 +290,19 @@ void commit_start(const char *job_id, BlockDriverState *bs, return; } - overlay_bs = bdrv_find_overlay(bs, top); - - if (overlay_bs == NULL) { - error_setg(errp, "Could not find overlay image for %s:", top->filename); - return; - } - s = block_job_create(job_id, &commit_job_driver, bs, 0, BLK_PERM_ALL, speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp); if (!s) { return; } - orig_base_flags = bdrv_get_flags(base); - orig_overlay_flags = bdrv_get_flags(overlay_bs); - - /* convert base & overlay_bs to r/w, if necessary */ + /* convert base to r/w, if necessary */ + orig_base_flags = bdrv_get_flags(base); if (!(orig_base_flags & BDRV_O_RDWR)) { reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL, orig_base_flags | BDRV_O_RDWR); } - if (!(orig_overlay_flags & BDRV_O_RDWR)) { - reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, NULL, - orig_overlay_flags | BDRV_O_RDWR); - } + if (reopen_queue) { bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err); if (local_err != NULL) { @@ -382,14 +363,6 @@ void commit_start(const char *job_id, BlockDriverState *bs, goto fail; } - /* overlay_bs must be blocked because it needs to be modified to - * update the backing image string. */ - ret = block_job_add_bdrv(&s->common, "overlay of top", overlay_bs, - BLK_PERM_GRAPH_MOD, BLK_PERM_ALL, errp); - if (ret < 0) { - goto fail; - } - s->base = blk_new(BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE, @@ -408,13 +381,8 @@ void commit_start(const char *job_id, BlockDriverState *bs, goto fail; } - s->active = bs; - - s->base_flags = orig_base_flags; - s->orig_overlay_flags = orig_overlay_flags; - + s->base_flags = orig_base_flags; s->backing_file_str = g_strdup(backing_file_str); - s->on_error = on_error; trace_commit_start(bs, base, top, s); @@ -429,7 +397,7 @@ fail: blk_unref(s->top); } if (commit_top_bs) { - bdrv_set_backing_hd(overlay_bs, top, &error_abort); + bdrv_replace_node(commit_top_bs, top, &error_abort); } block_job_early_fail(&s->common); } diff --git a/include/block/block.h b/include/block/block.h index 3c3af462e4..d5c2731a03 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -315,8 +315,7 @@ int bdrv_commit(BlockDriverState *bs); int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, const char *backing_fmt); void bdrv_register(BlockDriver *bdrv); -int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, - BlockDriverState *base, +int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, const char *backing_file_str); BlockDriverState *bdrv_find_overlay(BlockDriverState *active, BlockDriverState *bs); diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index d745cb4cde..18838948fa 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -287,10 +287,6 @@ class TestParallelOps(iotests.QMPTestCase): result = self.vm.qmp('block-stream', device='node6', base=self.imgs[4], job_id='stream-node6-v2') self.assert_qmp(result, 'error/class', 'GenericError') - # This fails because block-commit needs to block node6, the overlay of the 'top' image - result = self.vm.qmp('block-stream', device='node7', base=self.imgs[5], job_id='stream-node6-v3') - self.assert_qmp(result, 'error/class', 'GenericError') - # This fails because block-commit currently blocks the active layer even if it's not used result = self.vm.qmp('block-stream', device='drive0', base=self.imgs[5], job_id='stream-drive0') self.assert_qmp(result, 'error/class', 'GenericError') From 0f40444cc4fc6526a9a544b11475f1086113f9ba Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 5 Oct 2017 14:02:43 -0500 Subject: [PATCH 38/54] qemu-io: Add -C for opening with copy-on-read Make it easier to enable copy-on-read during iotests, by exposing a new bool option to main and open. Signed-off-by: Eric Blake Reviewed-by: Jeff Cody Reviewed-by: Kevin Wolf Reviewed-by: John Snow Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- qemu-io.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/qemu-io.c b/qemu-io.c index 265445ad89..c70bde3eb1 100644 --- a/qemu-io.c +++ b/qemu-io.c @@ -102,6 +102,7 @@ static void open_help(void) " Opens a file for subsequent use by all of the other qemu-io commands.\n" " -r, -- open file read-only\n" " -s, -- use snapshot file\n" +" -C, -- use copy-on-read\n" " -n, -- disable host cache, short for -t none\n" " -U, -- force shared permissions\n" " -k, -- use kernel AIO implementation (on Linux only)\n" @@ -120,7 +121,7 @@ static const cmdinfo_t open_cmd = { .argmin = 1, .argmax = -1, .flags = CMD_NOFILE_OK, - .args = "[-rsnkU] [-t cache] [-d discard] [-o options] [path]", + .args = "[-rsCnkU] [-t cache] [-d discard] [-o options] [path]", .oneline = "open the file specified by path", .help = open_help, }; @@ -145,7 +146,7 @@ static int open_f(BlockBackend *blk, int argc, char **argv) QDict *opts; bool force_share = false; - while ((c = getopt(argc, argv, "snro:kt:d:U")) != -1) { + while ((c = getopt(argc, argv, "snCro:kt:d:U")) != -1) { switch (c) { case 's': flags |= BDRV_O_SNAPSHOT; @@ -154,6 +155,9 @@ static int open_f(BlockBackend *blk, int argc, char **argv) flags |= BDRV_O_NOCACHE; writethrough = false; break; + case 'C': + flags |= BDRV_O_COPY_ON_READ; + break; case 'r': readonly = 1; break; @@ -251,6 +255,7 @@ static void usage(const char *name) " -r, --read-only export read-only\n" " -s, --snapshot use snapshot file\n" " -n, --nocache disable host cache, short for -t none\n" +" -C, --copy-on-read enable copy-on-read\n" " -m, --misalign misalign allocations for O_DIRECT\n" " -k, --native-aio use kernel AIO implementation (on Linux only)\n" " -t, --cache=MODE use the given cache mode for the image\n" @@ -439,7 +444,7 @@ static QemuOptsList file_opts = { int main(int argc, char **argv) { int readonly = 0; - const char *sopt = "hVc:d:f:rsnmkt:T:U"; + const char *sopt = "hVc:d:f:rsnCmkt:T:U"; const struct option lopt[] = { { "help", no_argument, NULL, 'h' }, { "version", no_argument, NULL, 'V' }, @@ -448,6 +453,7 @@ int main(int argc, char **argv) { "read-only", no_argument, NULL, 'r' }, { "snapshot", no_argument, NULL, 's' }, { "nocache", no_argument, NULL, 'n' }, + { "copy-on-read", no_argument, NULL, 'C' }, { "misalign", no_argument, NULL, 'm' }, { "native-aio", no_argument, NULL, 'k' }, { "discard", required_argument, NULL, 'd' }, @@ -492,6 +498,9 @@ int main(int argc, char **argv) flags |= BDRV_O_NOCACHE; writethrough = false; break; + case 'C': + flags |= BDRV_O_COPY_ON_READ; + break; case 'd': if (bdrv_parse_discard_flags(optarg, &flags) < 0) { error_report("Invalid discard option: %s", optarg); From 9cdcfd9f7afd0274919af95a164178ac6ee847ca Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 5 Oct 2017 14:02:44 -0500 Subject: [PATCH 39/54] block: Uniform handling of 0-length bdrv_get_block_status() Handle a 0-length block status request up front, with a uniform return value claiming the area is not allocated. Most callers don't pass a length of 0 to bdrv_get_block_status() and friends; but it definitely happens with a 0-length read when copy-on-read is enabled. While we could audit all callers to ensure that they never make a 0-length request, and then assert that fact, it was just as easy to fix things to always report success (as long as the callers are careful to not go into an infinite loop). However, we had inconsistent behavior on whether the status is reported as allocated or defers to the backing layer, depending on what callbacks the driver implements, and possibly wasting quite a few CPU cycles to get to that answer. Consistently reporting unallocated up front doesn't really hurt anything, and makes it easier both for callers (0-length requests now have well-defined behavior) and for drivers (drivers don't have to deal with 0-length requests). Signed-off-by: Eric Blake Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/io.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/io.c b/block/io.c index e0f904583f..94f74703b7 100644 --- a/block/io.c +++ b/block/io.c @@ -1777,6 +1777,10 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, *pnum = 0; return BDRV_BLOCK_EOF; } + if (!nb_sectors) { + *pnum = 0; + return 0; + } n = total_sectors - sector_num; if (n < nb_sectors) { From 8803714b53243816e96a759de3cfd22625230023 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 5 Oct 2017 14:02:45 -0500 Subject: [PATCH 40/54] iotests: Restore stty settings on completion Executing qemu with a terminal as stdin will temporarily alter stty settings on that terminal (for example, disabling echo), because of how we run both the monitor and any multiplexing with guest input. Normally, qemu restores the original settings on exit; but if an iotest triggers qemu to abort in the middle, we can be left with the altered terminal setup. This can make life very annoying when debugging an iotest failure (not everyone remembers the trick of blind-typing 'stty sane' without echo, and some people prefer terminal settings that are slightly different than the defaults picked by 'stty sane'). It is possible to avoid qemu corrupting the terminal by not passing a terminal to qemu's stdin in the first place (as in, use './check ... Signed-off-by: Kevin Wolf --- tests/qemu-iotests/check | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index 176cb8e937..e6b6ff7a04 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -134,6 +134,13 @@ export VALGRIND_QEMU= export IMGKEYSECRET= export IMGOPTSSYNTAX=false +# Save current tty settings, since an aborting qemu call may leave things +# screwed up +STTY_RESTORE= +if test -t 0; then + STTY_RESTORE=$(stty -g) +fi + for r do @@ -664,6 +671,9 @@ END { if (NR > 0) { needwrap=false fi + if test -n "$STTY_RESTORE"; then + stty $STTY_RESTORE + fi rm -f "${TEST_DIR}"/*.out "${TEST_DIR}"/*.err "${TEST_DIR}"/*.time rm -f "${TEST_DIR}"/check.pid "${TEST_DIR}"/check.sts rm -f $tmp.* From d855ebcd3cca4080a81aeec9c0a27af006734280 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 5 Oct 2017 14:02:46 -0500 Subject: [PATCH 41/54] block: Add blkdebug hook for copy-on-read Make it possible to inject errors on writes performed during a read operation due to copy-on-read semantics. Signed-off-by: Eric Blake Reviewed-by: Jeff Cody Reviewed-by: Kevin Wolf Reviewed-by: John Snow Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/io.c | 1 + qapi/block-core.json | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/block/io.c b/block/io.c index 94f74703b7..a5598ed869 100644 --- a/block/io.c +++ b/block/io.c @@ -983,6 +983,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, goto err; } + bdrv_debug_event(bs, BLKDBG_COR_WRITE); if (drv->bdrv_co_pwrite_zeroes && buffer_is_zero(bounce_buffer, iov.iov_len)) { /* FIXME: Should we (perhaps conditionally) be setting diff --git a/qapi/block-core.json b/qapi/block-core.json index 750bb0c77c..ab96e348e6 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2538,6 +2538,8 @@ # # @l1_shrink_free_l2_clusters: discard the l2 tables. (since 2.11) # +# @cor_write: a write due to copy-on-read (since 2.11) +# # Since: 2.9 ## { 'enum': 'BlkdebugEvent', 'prefix': 'BLKDBG', @@ -2555,7 +2557,8 @@ 'flush_to_disk', 'pwritev_rmw_head', 'pwritev_rmw_after_head', 'pwritev_rmw_tail', 'pwritev_rmw_after_tail', 'pwritev', 'pwritev_zero', 'pwritev_done', 'empty_image_prepare', - 'l1_shrink_write_table', 'l1_shrink_free_l2_clusters' ] } + 'l1_shrink_write_table', 'l1_shrink_free_l2_clusters', + 'cor_write'] } ## # @BlkdebugInjectErrorOptions: From cb2e28780c7080af489e72227683fe374f05022d Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 5 Oct 2017 14:02:47 -0500 Subject: [PATCH 42/54] block: Perform copy-on-read in loop Improve our braindead copy-on-read implementation. Pre-patch, we have multiple issues: - we create a bounce buffer and perform a write for the entire request, even if the active image already has 99% of the clusters occupied, and really only needs to copy-on-read the remaining 1% of the clusters - our bounce buffer was as large as the read request, and can needlessly exhaust our memory by using double the memory of the request size (the original request plus our bounce buffer), rather than a capped maximum overhead beyond the original - if a driver has a max_transfer limit, we are bypassing the normal code in bdrv_aligned_preadv() that fragments to that limit, and instead attempt to read the entire buffer from the driver in one go, which some drivers may assert on - a client can request a large request of nearly 2G such that rounding the request out to cluster boundaries results in a byte count larger than 2G. While this cannot exceed 32 bits, it DOES have some follow-on problems: -- the call to bdrv_driver_pread() can assert for exceeding BDRV_REQUEST_MAX_BYTES, if the driver is old and lacks .bdrv_co_preadv -- if the buffer is all zeroes, the subsequent call to bdrv_co_do_pwrite_zeroes is a no-op due to a negative size, which means we did not actually copy on read Fix all of these issues by breaking up the action into a loop, where each iteration is capped to sane limits. Also, querying the allocation status allows us to optimize: when data is already present in the active layer, we don't need to bounce. Note that the code has a telling comment that copy-on-read should probably be a filter driver rather than a bolt-on hack in io.c; but that remains a task for another day. CC: qemu-stable@nongnu.org Signed-off-by: Eric Blake Reviewed-by: Kevin Wolf Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/io.c | 124 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 84 insertions(+), 40 deletions(-) diff --git a/block/io.c b/block/io.c index a5598ed869..8e419070b5 100644 --- a/block/io.c +++ b/block/io.c @@ -34,6 +34,9 @@ #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ +/* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ +#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) + static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, BdrvRequestFlags flags); @@ -945,11 +948,14 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, BlockDriver *drv = bs->drv; struct iovec iov; - QEMUIOVector bounce_qiov; + QEMUIOVector local_qiov; int64_t cluster_offset; unsigned int cluster_bytes; size_t skip_bytes; int ret; + int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, + BDRV_REQUEST_MAX_BYTES); + unsigned int progress = 0; /* FIXME We cannot require callers to have write permissions when all they * are doing is a read request. If we did things right, write permissions @@ -961,53 +967,95 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, // assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE)); /* Cover entire cluster so no additional backing file I/O is required when - * allocating cluster in the image file. + * allocating cluster in the image file. Note that this value may exceed + * BDRV_REQUEST_MAX_BYTES (even when the original read did not), which + * is one reason we loop rather than doing it all at once. */ bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes); + skip_bytes = offset - cluster_offset; trace_bdrv_co_do_copy_on_readv(bs, offset, bytes, cluster_offset, cluster_bytes); - iov.iov_len = cluster_bytes; - iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len); + bounce_buffer = qemu_try_blockalign(bs, + MIN(MIN(max_transfer, cluster_bytes), + MAX_BOUNCE_BUFFER)); if (bounce_buffer == NULL) { ret = -ENOMEM; goto err; } - qemu_iovec_init_external(&bounce_qiov, &iov, 1); + while (cluster_bytes) { + int64_t pnum; - ret = bdrv_driver_preadv(bs, cluster_offset, cluster_bytes, - &bounce_qiov, 0); - if (ret < 0) { - goto err; + ret = bdrv_is_allocated(bs, cluster_offset, + MIN(cluster_bytes, max_transfer), &pnum); + if (ret < 0) { + /* Safe to treat errors in querying allocation as if + * unallocated; we'll probably fail again soon on the + * read, but at least that will set a decent errno. + */ + pnum = MIN(cluster_bytes, max_transfer); + } + + assert(skip_bytes < pnum); + + if (ret <= 0) { + /* Must copy-on-read; use the bounce buffer */ + iov.iov_base = bounce_buffer; + iov.iov_len = pnum = MIN(pnum, MAX_BOUNCE_BUFFER); + qemu_iovec_init_external(&local_qiov, &iov, 1); + + ret = bdrv_driver_preadv(bs, cluster_offset, pnum, + &local_qiov, 0); + if (ret < 0) { + goto err; + } + + bdrv_debug_event(bs, BLKDBG_COR_WRITE); + if (drv->bdrv_co_pwrite_zeroes && + buffer_is_zero(bounce_buffer, pnum)) { + /* FIXME: Should we (perhaps conditionally) be setting + * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy + * that still correctly reads as zero? */ + ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum, 0); + } else { + /* This does not change the data on the disk, it is not + * necessary to flush even in cache=writethrough mode. + */ + ret = bdrv_driver_pwritev(bs, cluster_offset, pnum, + &local_qiov, 0); + } + + if (ret < 0) { + /* It might be okay to ignore write errors for guest + * requests. If this is a deliberate copy-on-read + * then we don't want to ignore the error. Simply + * report it in all cases. + */ + goto err; + } + + qemu_iovec_from_buf(qiov, progress, bounce_buffer + skip_bytes, + pnum - skip_bytes); + } else { + /* Read directly into the destination */ + qemu_iovec_init(&local_qiov, qiov->niov); + qemu_iovec_concat(&local_qiov, qiov, progress, pnum - skip_bytes); + ret = bdrv_driver_preadv(bs, offset + progress, local_qiov.size, + &local_qiov, 0); + qemu_iovec_destroy(&local_qiov); + if (ret < 0) { + goto err; + } + } + + cluster_offset += pnum; + cluster_bytes -= pnum; + progress += pnum - skip_bytes; + skip_bytes = 0; } - - bdrv_debug_event(bs, BLKDBG_COR_WRITE); - if (drv->bdrv_co_pwrite_zeroes && - buffer_is_zero(bounce_buffer, iov.iov_len)) { - /* FIXME: Should we (perhaps conditionally) be setting - * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy - * that still correctly reads as zero? */ - ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, cluster_bytes, 0); - } else { - /* This does not change the data on the disk, it is not necessary - * to flush even in cache=writethrough mode. - */ - ret = bdrv_driver_pwritev(bs, cluster_offset, cluster_bytes, - &bounce_qiov, 0); - } - - if (ret < 0) { - /* It might be okay to ignore write errors for guest requests. If this - * is a deliberate copy-on-read then we don't want to ignore the error. - * Simply report it in all cases. - */ - goto err; - } - - skip_bytes = offset - cluster_offset; - qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, bytes); + ret = 0; err: qemu_vfree(bounce_buffer); @@ -1213,9 +1261,6 @@ int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num, return bdrv_co_do_readv(child, sector_num, nb_sectors, qiov, 0); } -/* Maximum buffer for write zeroes fallback, in bytes */ -#define MAX_WRITE_ZEROES_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) - static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, BdrvRequestFlags flags) { @@ -1230,8 +1275,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX); int alignment = MAX(bs->bl.pwrite_zeroes_alignment, bs->bl.request_alignment); - int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, - MAX_WRITE_ZEROES_BOUNCE_BUFFER); + int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER); assert(alignment % bs->bl.request_alignment == 0); head = offset % alignment; From 461743390d3a1ceafa4503811adbc87c7d372741 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 5 Oct 2017 14:02:48 -0500 Subject: [PATCH 43/54] iotests: Add test 197 for covering copy-on-read Add a test for qcow2 copy-on-read behavior, including exposure for the just-fixed bugs. The copy-on-read behavior is always to a qcow2 image, but the test is careful to allow running with most image protocol/format combos as the backing file being copied from (luks being the exception, as it is harder to pass the right secret to all the right places). In fact, for './check nbd', this appears to be the first time we've had a qcow2 image wrapping NBD, requiring an additional line in _filter_img_create to match the similar line in _filter_img_info. Invoking blkdebug to prove we don't write too much took some effort to get working; and it requires that $TEST_WRAP (based on $TEST_DIR) not be subject to word splitting. We may decide later to have the entire iotests suite use relative rather than absolute names, to avoid problems inherited by the absolute name of $PWD or $TEST_DIR, at which point the sanity check in this commit could be simplified. This test requires at least 2G of consecutive memory to succeed; as such, it is prone to spurious failures, particularly on 32-bit machines under load. This situation is detected and triggers an early exit to skip the test, rather than a failure. To manually provoke this setup on a beefier machine, I used: $ (ulimit -S -v 1000000; ./check -qcow2 197) Signed-off-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/197 | 109 +++++++++++++++++++++++++++++++ tests/qemu-iotests/197.out | 26 ++++++++ tests/qemu-iotests/common.filter | 1 + tests/qemu-iotests/group | 1 + 4 files changed, 137 insertions(+) create mode 100755 tests/qemu-iotests/197 create mode 100644 tests/qemu-iotests/197.out diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197 new file mode 100755 index 0000000000..887eb4f496 --- /dev/null +++ b/tests/qemu-iotests/197 @@ -0,0 +1,109 @@ +#!/bin/bash +# +# Test case for copy-on-read into qcow2 +# +# Copyright (C) 2017 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=eblake@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +status=1 # failure is the default! + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +TEST_WRAP="$TEST_DIR/t.wrap.qcow2" +BLKDBG_CONF="$TEST_DIR/blkdebug.conf" + +# Sanity check: our use of blkdebug fails if $TEST_DIR contains spaces +# or other problems +case "$TEST_DIR" in + *[^-_a-zA-Z0-9/]*) + _notrun "Suspicious TEST_DIR='$TEST_DIR', cowardly refusing to run" ;; +esac + +_cleanup() +{ + _cleanup_test_img + rm -f "$BLKDBG_CONF" +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# Test is supported for any backing file; but we force qcow2 for our wrapper. +_supported_fmt generic +_supported_proto generic +_supported_os Linux +# LUKS support may be possible, but it complicates things. +_unsupported_fmt luks + +echo +echo '=== Copy-on-read ===' +echo + +# Prep the images +_make_test_img 4G +$QEMU_IO -c "write -P 55 3G 1k" "$TEST_IMG" | _filter_qemu_io +IMGPROTO=file IMGFMT=qcow2 IMGOPTS= TEST_IMG_FILE="$TEST_WRAP" \ + _make_test_img -F "$IMGFMT" -b "$TEST_IMG" | _filter_img_create +$QEMU_IO -f qcow2 -c "write -z -u 1M 64k" "$TEST_WRAP" | _filter_qemu_io + +# Ensure that a read of two clusters, but where one is already allocated, +# does not re-write the allocated cluster +cat > "$BLKDBG_CONF" <&1 | _filter_qemu_io) +case $output in + *allocate*) + _notrun "Insufficent memory to run test" ;; + *) printf '%s\n' "$output" ;; +esac +$QEMU_IO -f qcow2 -C -c "read -P 0 $((3*1024*1024*1024 + 1024)) 1k" \ + "$TEST_WRAP" | _filter_qemu_io + +# Copy-on-read is incompatible with read-only +$QEMU_IO -f qcow2 -C -r "$TEST_WRAP" 2>&1 | _filter_testdir + +# Break the backing chain, and show that images are identical, and that +# we properly copied over explicit zeros. +$QEMU_IMG rebase -u -b "" -f qcow2 "$TEST_WRAP" +$QEMU_IO -f qcow2 -c map "$TEST_WRAP" +_check_test_img +$QEMU_IMG compare -f $IMGFMT -F qcow2 "$TEST_IMG" "$TEST_WRAP" + +# success, all done +echo '*** done' +status=0 diff --git a/tests/qemu-iotests/197.out b/tests/qemu-iotests/197.out new file mode 100644 index 0000000000..52b4137d7b --- /dev/null +++ b/tests/qemu-iotests/197.out @@ -0,0 +1,26 @@ +QA output created by 197 + +=== Copy-on-read === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4294967296 +wrote 1024/1024 bytes at offset 3221225472 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Formatting 'TEST_DIR/t.wrap.IMGFMT', fmt=IMGFMT size=4294967296 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT +wrote 65536/65536 bytes at offset 1048576 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 1048576 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 0/0 bytes at offset 0 +0 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 2147483136/2147483136 bytes at offset 1024 +2 GiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1024/1024 bytes at offset 3221226496 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +can't open device TEST_DIR/t.wrap.qcow2: Can't use copy-on-read on read-only device +2 GiB (0x80010000) bytes allocated at offset 0 bytes (0x0) +1023.938 MiB (0x3fff0000) bytes not allocated at offset 2 GiB (0x80010000) +64 KiB (0x10000) bytes allocated at offset 3 GiB (0xc0000000) +1023.938 MiB (0x3fff0000) bytes not allocated at offset 3 GiB (0xc0010000) +No errors were found on the image. +Images are identical. +*** done diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter index 9d5442ecd9..227b37e941 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -111,6 +111,7 @@ _filter_img_create() sed -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ -e "s#$TEST_DIR#TEST_DIR#g" \ -e "s#$IMGFMT#IMGFMT#g" \ + -e 's#nbd:127.0.0.1:10810#TEST_DIR/t.IMGFMT#g' \ -e "s# encryption=off##g" \ -e "s# cluster_size=[0-9]\\+##g" \ -e "s# table_size=[0-9]\\+##g" \ diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 595f4fd416..83da427c0a 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -192,3 +192,4 @@ 192 rw auto quick 194 rw auto migration quick 195 rw auto quick +197 rw auto quick From 161253e2d0a83a1b33bca019c6e926013e1a03db Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 27 Sep 2017 13:53:35 +0100 Subject: [PATCH 44/54] block: use 1 MB bounce buffers for crypto instead of 16KB Using 16KB bounce buffers creates a significant performance penalty for I/O to encrypted volumes on storage which high I/O latency (rotating rust & network drives), because it triggers lots of fairly small I/O operations. On tests with rotating rust, and cache=none|directsync, write speed increased from 2MiB/s to 32MiB/s, on a par with that achieved by the in-kernel luks driver. With other cache modes the in-kernel driver is still notably faster because it is able to report completion of the I/O request before any encryption is done, while the in-QEMU driver must encrypt the data before completion. Signed-off-by: Daniel P. Berrange Message-id: 20170927125340.12360-2-berrange@redhat.com Reviewed-by: Eric Blake Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- block/crypto.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/block/crypto.c b/block/crypto.c index 58ef6f2f52..684cabeaf8 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -379,7 +379,11 @@ static void block_crypto_close(BlockDriverState *bs) } -#define BLOCK_CRYPTO_MAX_SECTORS 32 +/* + * 1 MB bounce buffer gives good performance / memory tradeoff + * when using cache=none|directsync. + */ +#define BLOCK_CRYPTO_MAX_IO_SIZE (1024 * 1024) static coroutine_fn int block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, @@ -396,12 +400,11 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, qemu_iovec_init(&hd_qiov, qiov->niov); - /* Bounce buffer so we have a linear mem region for - * entire sector. XXX optimize so we avoid bounce - * buffer in case that qiov->niov == 1 + /* Bounce buffer because we don't wish to expose cipher text + * in qiov which points to guest memory. */ cipher_data = - qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512, + qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_IO_SIZE, qiov->size)); if (cipher_data == NULL) { ret = -ENOMEM; @@ -411,8 +414,8 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, while (remaining_sectors) { cur_nr_sectors = remaining_sectors; - if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) { - cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS; + if (cur_nr_sectors > (BLOCK_CRYPTO_MAX_IO_SIZE / 512)) { + cur_nr_sectors = (BLOCK_CRYPTO_MAX_IO_SIZE / 512); } qemu_iovec_reset(&hd_qiov); @@ -464,12 +467,11 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, qemu_iovec_init(&hd_qiov, qiov->niov); - /* Bounce buffer so we have a linear mem region for - * entire sector. XXX optimize so we avoid bounce - * buffer in case that qiov->niov == 1 + /* Bounce buffer because we're not permitted to touch + * contents of qiov - it points to guest memory. */ cipher_data = - qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512, + qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_IO_SIZE, qiov->size)); if (cipher_data == NULL) { ret = -ENOMEM; @@ -479,8 +481,8 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, while (remaining_sectors) { cur_nr_sectors = remaining_sectors; - if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) { - cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS; + if (cur_nr_sectors > (BLOCK_CRYPTO_MAX_IO_SIZE / 512)) { + cur_nr_sectors = (BLOCK_CRYPTO_MAX_IO_SIZE / 512); } qemu_iovec_to_buf(qiov, bytes_done, From 850f49de9b57511dcaf2cd7e45059f8f38fadf3b Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 27 Sep 2017 13:53:36 +0100 Subject: [PATCH 45/54] crypto: expose encryption sector size in APIs While current encryption schemes all have a fixed sector size of 512 bytes, this is not guaranteed to be the case in future. Expose the sector size in the APIs so the block layer can remove assumptions about fixed 512 byte sectors. Reviewed-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Daniel P. Berrange Message-id: 20170927125340.12360-3-berrange@redhat.com Signed-off-by: Max Reitz --- crypto/block-luks.c | 6 ++++-- crypto/block-qcow.c | 1 + crypto/block.c | 6 ++++++ crypto/blockpriv.h | 1 + include/crypto/block.h | 15 +++++++++++++++ 5 files changed, 27 insertions(+), 2 deletions(-) diff --git a/crypto/block-luks.c b/crypto/block-luks.c index 36bc856084..a9062bb0f2 100644 --- a/crypto/block-luks.c +++ b/crypto/block-luks.c @@ -846,8 +846,9 @@ qcrypto_block_luks_open(QCryptoBlock *block, } } + block->sector_size = QCRYPTO_BLOCK_LUKS_SECTOR_SIZE; block->payload_offset = luks->header.payload_offset * - QCRYPTO_BLOCK_LUKS_SECTOR_SIZE; + block->sector_size; luks->cipher_alg = cipheralg; luks->cipher_mode = ciphermode; @@ -1240,8 +1241,9 @@ qcrypto_block_luks_create(QCryptoBlock *block, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)) * QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS); + block->sector_size = QCRYPTO_BLOCK_LUKS_SECTOR_SIZE; block->payload_offset = luks->header.payload_offset * - QCRYPTO_BLOCK_LUKS_SECTOR_SIZE; + block->sector_size; /* Reserve header space to match payload offset */ initfunc(block, block->payload_offset, opaque, &local_err); diff --git a/crypto/block-qcow.c b/crypto/block-qcow.c index a456fe338b..4dd594a9ba 100644 --- a/crypto/block-qcow.c +++ b/crypto/block-qcow.c @@ -80,6 +80,7 @@ qcrypto_block_qcow_init(QCryptoBlock *block, goto fail; } + block->sector_size = QCRYPTO_BLOCK_QCOW_SECTOR_SIZE; block->payload_offset = 0; return 0; diff --git a/crypto/block.c b/crypto/block.c index c382393d9a..a7a9ad240e 100644 --- a/crypto/block.c +++ b/crypto/block.c @@ -170,6 +170,12 @@ uint64_t qcrypto_block_get_payload_offset(QCryptoBlock *block) } +uint64_t qcrypto_block_get_sector_size(QCryptoBlock *block) +{ + return block->sector_size; +} + + void qcrypto_block_free(QCryptoBlock *block) { if (!block) { diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h index 0edb810e22..d227522d88 100644 --- a/crypto/blockpriv.h +++ b/crypto/blockpriv.h @@ -36,6 +36,7 @@ struct QCryptoBlock { QCryptoHashAlgorithm kdfhash; size_t niv; uint64_t payload_offset; /* In bytes */ + uint64_t sector_size; /* In bytes */ }; struct QCryptoBlockDriver { diff --git a/include/crypto/block.h b/include/crypto/block.h index f0e543bee1..13232b2472 100644 --- a/include/crypto/block.h +++ b/include/crypto/block.h @@ -240,6 +240,21 @@ QCryptoHashAlgorithm qcrypto_block_get_kdf_hash(QCryptoBlock *block); */ uint64_t qcrypto_block_get_payload_offset(QCryptoBlock *block); +/** + * qcrypto_block_get_sector_size: + * @block: the block encryption object + * + * Get the size of sectors used for payload encryption. A new + * IV is used at the start of each sector. The encryption + * sector size is not required to match the sector size of the + * underlying storage. For example LUKS will always use a 512 + * byte sector size, even if the volume is on a disk with 4k + * sectors. + * + * Returns: the sector in bytes + */ +uint64_t qcrypto_block_get_sector_size(QCryptoBlock *block); + /** * qcrypto_block_free: * @block: the block encryption object From 31376555c7b447afb1bf9084eacbb8f566ff6b9d Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 27 Sep 2017 13:53:37 +0100 Subject: [PATCH 46/54] block: fix data type casting for crypto payload offset The crypto APIs report the offset of the data payload as an uint64_t type, but the block driver is casting to size_t or ssize_t which will potentially truncate. Most of the block APIs use int64_t for offsets meanwhile, so even if using uint64_t in the crypto block driver we are still at risk of truncation. Change the block crypto driver to use uint64_t, but add asserts that the value is less than INT64_MAX. Reviewed-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Daniel P. Berrange Message-id: 20170927125340.12360-4-berrange@redhat.com Signed-off-by: Max Reitz --- block/crypto.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/block/crypto.c b/block/crypto.c index 684cabeaf8..61f5d77bc0 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -364,8 +364,9 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset, PreallocMode prealloc, Error **errp) { BlockCrypto *crypto = bs->opaque; - size_t payload_offset = + uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); + assert(payload_offset < (INT64_MAX - offset)); offset += payload_offset; @@ -395,8 +396,9 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, uint8_t *cipher_data = NULL; QEMUIOVector hd_qiov; int ret = 0; - size_t payload_offset = + uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block) / 512; + assert(payload_offset < (INT64_MAX / 512)); qemu_iovec_init(&hd_qiov, qiov->niov); @@ -462,8 +464,9 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, uint8_t *cipher_data = NULL; QEMUIOVector hd_qiov; int ret = 0; - size_t payload_offset = + uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block) / 512; + assert(payload_offset < (INT64_MAX / 512)); qemu_iovec_init(&hd_qiov, qiov->niov); @@ -524,7 +527,9 @@ static int64_t block_crypto_getlength(BlockDriverState *bs) BlockCrypto *crypto = bs->opaque; int64_t len = bdrv_getlength(bs->file->bs); - ssize_t offset = qcrypto_block_get_payload_offset(crypto->block); + uint64_t offset = qcrypto_block_get_payload_offset(crypto->block); + assert(offset < INT64_MAX); + assert(offset < len); len -= offset; From a73466fbad6d48ba356940474cd72da602373304 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 27 Sep 2017 13:53:38 +0100 Subject: [PATCH 47/54] block: convert crypto driver to bdrv_co_preadv|pwritev Make the crypto driver implement the bdrv_co_preadv|pwritev callbacks, and also use bdrv_co_preadv|pwritev for I/O with the protocol driver beneath. This replaces sector based I/O with byte based I/O, and allows us to stop assuming the physical sector size matches the encryption sector size. Signed-off-by: Daniel P. Berrange Message-id: 20170927125340.12360-5-berrange@redhat.com Reviewed-by: Eric Blake Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- block/crypto.c | 106 +++++++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 52 deletions(-) diff --git a/block/crypto.c b/block/crypto.c index 61f5d77bc0..965c173b01 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -387,18 +387,23 @@ static void block_crypto_close(BlockDriverState *bs) #define BLOCK_CRYPTO_MAX_IO_SIZE (1024 * 1024) static coroutine_fn int -block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, - int remaining_sectors, QEMUIOVector *qiov) +block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) { BlockCrypto *crypto = bs->opaque; - int cur_nr_sectors; /* number of sectors in current iteration */ + uint64_t cur_bytes; /* number of bytes in current iteration */ uint64_t bytes_done = 0; uint8_t *cipher_data = NULL; QEMUIOVector hd_qiov; int ret = 0; - uint64_t payload_offset = - qcrypto_block_get_payload_offset(crypto->block) / 512; - assert(payload_offset < (INT64_MAX / 512)); + uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); + uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); + uint64_t sector_num = offset / sector_size; + + assert(!flags); + assert(payload_offset < INT64_MAX); + assert(QEMU_IS_ALIGNED(offset, sector_size)); + assert(QEMU_IS_ALIGNED(bytes, sector_size)); qemu_iovec_init(&hd_qiov, qiov->niov); @@ -413,37 +418,29 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, goto cleanup; } - while (remaining_sectors) { - cur_nr_sectors = remaining_sectors; - - if (cur_nr_sectors > (BLOCK_CRYPTO_MAX_IO_SIZE / 512)) { - cur_nr_sectors = (BLOCK_CRYPTO_MAX_IO_SIZE / 512); - } + while (bytes) { + cur_bytes = MIN(bytes, BLOCK_CRYPTO_MAX_IO_SIZE); qemu_iovec_reset(&hd_qiov); - qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512); + qemu_iovec_add(&hd_qiov, cipher_data, cur_bytes); - ret = bdrv_co_readv(bs->file, - payload_offset + sector_num, - cur_nr_sectors, &hd_qiov); + ret = bdrv_co_preadv(bs->file, payload_offset + offset + bytes_done, + cur_bytes, &hd_qiov, 0); if (ret < 0) { goto cleanup; } - if (qcrypto_block_decrypt(crypto->block, - sector_num, - cipher_data, cur_nr_sectors * 512, - NULL) < 0) { + if (qcrypto_block_decrypt(crypto->block, sector_num, cipher_data, + cur_bytes, NULL) < 0) { ret = -EIO; goto cleanup; } - qemu_iovec_from_buf(qiov, bytes_done, - cipher_data, cur_nr_sectors * 512); + qemu_iovec_from_buf(qiov, bytes_done, cipher_data, cur_bytes); - remaining_sectors -= cur_nr_sectors; - sector_num += cur_nr_sectors; - bytes_done += cur_nr_sectors * 512; + sector_num += cur_bytes / sector_size; + bytes -= cur_bytes; + bytes_done += cur_bytes; } cleanup: @@ -455,18 +452,23 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, static coroutine_fn int -block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, - int remaining_sectors, QEMUIOVector *qiov) +block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) { BlockCrypto *crypto = bs->opaque; - int cur_nr_sectors; /* number of sectors in current iteration */ + uint64_t cur_bytes; /* number of bytes in current iteration */ uint64_t bytes_done = 0; uint8_t *cipher_data = NULL; QEMUIOVector hd_qiov; int ret = 0; - uint64_t payload_offset = - qcrypto_block_get_payload_offset(crypto->block) / 512; - assert(payload_offset < (INT64_MAX / 512)); + uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); + uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); + uint64_t sector_num = offset / sector_size; + + assert(!flags); + assert(payload_offset < INT64_MAX); + assert(QEMU_IS_ALIGNED(offset, sector_size)); + assert(QEMU_IS_ALIGNED(bytes, sector_size)); qemu_iovec_init(&hd_qiov, qiov->niov); @@ -481,37 +483,29 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, goto cleanup; } - while (remaining_sectors) { - cur_nr_sectors = remaining_sectors; + while (bytes) { + cur_bytes = MIN(bytes, BLOCK_CRYPTO_MAX_IO_SIZE); - if (cur_nr_sectors > (BLOCK_CRYPTO_MAX_IO_SIZE / 512)) { - cur_nr_sectors = (BLOCK_CRYPTO_MAX_IO_SIZE / 512); - } + qemu_iovec_to_buf(qiov, bytes_done, cipher_data, cur_bytes); - qemu_iovec_to_buf(qiov, bytes_done, - cipher_data, cur_nr_sectors * 512); - - if (qcrypto_block_encrypt(crypto->block, - sector_num, - cipher_data, cur_nr_sectors * 512, - NULL) < 0) { + if (qcrypto_block_encrypt(crypto->block, sector_num, cipher_data, + cur_bytes, NULL) < 0) { ret = -EIO; goto cleanup; } qemu_iovec_reset(&hd_qiov); - qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512); + qemu_iovec_add(&hd_qiov, cipher_data, cur_bytes); - ret = bdrv_co_writev(bs->file, - payload_offset + sector_num, - cur_nr_sectors, &hd_qiov); + ret = bdrv_co_pwritev(bs->file, payload_offset + offset + bytes_done, + cur_bytes, &hd_qiov, 0); if (ret < 0) { goto cleanup; } - remaining_sectors -= cur_nr_sectors; - sector_num += cur_nr_sectors; - bytes_done += cur_nr_sectors * 512; + sector_num += cur_bytes / sector_size; + bytes -= cur_bytes; + bytes_done += cur_bytes; } cleanup: @@ -521,6 +515,13 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, return ret; } +static void block_crypto_refresh_limits(BlockDriverState *bs, Error **errp) +{ + BlockCrypto *crypto = bs->opaque; + uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); + bs->bl.request_alignment = sector_size; /* No sub-sector I/O */ +} + static int64_t block_crypto_getlength(BlockDriverState *bs) { @@ -620,8 +621,9 @@ BlockDriver bdrv_crypto_luks = { .bdrv_truncate = block_crypto_truncate, .create_opts = &block_crypto_create_opts_luks, - .bdrv_co_readv = block_crypto_co_readv, - .bdrv_co_writev = block_crypto_co_writev, + .bdrv_refresh_limits = block_crypto_refresh_limits, + .bdrv_co_preadv = block_crypto_co_preadv, + .bdrv_co_pwritev = block_crypto_co_pwritev, .bdrv_getlength = block_crypto_getlength, .bdrv_get_info = block_crypto_get_info_luks, .bdrv_get_specific_info = block_crypto_get_specific_info_luks, From 4609742a495d98ac358098e10d91890185dcdc60 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 27 Sep 2017 13:53:39 +0100 Subject: [PATCH 48/54] block: convert qcrypto_block_encrypt|decrypt to take bytes offset Instead of sector offset, take the bytes offset when encrypting or decrypting data. Signed-off-by: Daniel P. Berrange Message-id: 20170927125340.12360-6-berrange@redhat.com Reviewed-by: Eric Blake Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- block/crypto.c | 12 ++++-------- block/qcow.c | 11 +++++++---- block/qcow2-cluster.c | 8 +++----- block/qcow2.c | 4 ++-- crypto/block-luks.c | 12 ++++++++---- crypto/block-qcow.c | 12 ++++++++---- crypto/block.c | 20 ++++++++++++++------ crypto/blockpriv.h | 4 ++-- include/crypto/block.h | 14 ++++++++------ 9 files changed, 56 insertions(+), 41 deletions(-) diff --git a/block/crypto.c b/block/crypto.c index 965c173b01..edf53d49d1 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -398,7 +398,6 @@ block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, int ret = 0; uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); - uint64_t sector_num = offset / sector_size; assert(!flags); assert(payload_offset < INT64_MAX); @@ -430,15 +429,14 @@ block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, goto cleanup; } - if (qcrypto_block_decrypt(crypto->block, sector_num, cipher_data, - cur_bytes, NULL) < 0) { + if (qcrypto_block_decrypt(crypto->block, offset + bytes_done, + cipher_data, cur_bytes, NULL) < 0) { ret = -EIO; goto cleanup; } qemu_iovec_from_buf(qiov, bytes_done, cipher_data, cur_bytes); - sector_num += cur_bytes / sector_size; bytes -= cur_bytes; bytes_done += cur_bytes; } @@ -463,7 +461,6 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, int ret = 0; uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); - uint64_t sector_num = offset / sector_size; assert(!flags); assert(payload_offset < INT64_MAX); @@ -488,8 +485,8 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, qemu_iovec_to_buf(qiov, bytes_done, cipher_data, cur_bytes); - if (qcrypto_block_encrypt(crypto->block, sector_num, cipher_data, - cur_bytes, NULL) < 0) { + if (qcrypto_block_encrypt(crypto->block, offset + bytes_done, + cipher_data, cur_bytes, NULL) < 0) { ret = -EIO; goto cleanup; } @@ -503,7 +500,6 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, goto cleanup; } - sector_num += cur_bytes / sector_size; bytes -= cur_bytes; bytes_done += cur_bytes; } diff --git a/block/qcow.c b/block/qcow.c index f450b00cfc..9569deeaf0 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -478,7 +478,9 @@ static int get_cluster_offset(BlockDriverState *bs, for(i = 0; i < s->cluster_sectors; i++) { if (i < n_start || i >= n_end) { memset(s->cluster_data, 0x00, 512); - if (qcrypto_block_encrypt(s->crypto, start_sect + i, + if (qcrypto_block_encrypt(s->crypto, + (start_sect + i) * + BDRV_SECTOR_SIZE, s->cluster_data, BDRV_SECTOR_SIZE, NULL) < 0) { @@ -668,7 +670,8 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, } if (bs->encrypted) { assert(s->crypto); - if (qcrypto_block_decrypt(s->crypto, sector_num, buf, + if (qcrypto_block_decrypt(s->crypto, + sector_num * BDRV_SECTOR_SIZE, buf, n * BDRV_SECTOR_SIZE, NULL) < 0) { ret = -EIO; break; @@ -740,8 +743,8 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, } if (bs->encrypted) { assert(s->crypto); - if (qcrypto_block_encrypt(s->crypto, sector_num, buf, - n * BDRV_SECTOR_SIZE, NULL) < 0) { + if (qcrypto_block_encrypt(s->crypto, sector_num * BDRV_SECTOR_SIZE, + buf, n * BDRV_SECTOR_SIZE, NULL) < 0) { ret = -EIO; break; } diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index d2518d1893..0e5aec81cb 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -446,15 +446,13 @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs, { if (bytes && bs->encrypted) { BDRVQcow2State *s = bs->opaque; - int64_t sector = (s->crypt_physical_offset ? + int64_t offset = (s->crypt_physical_offset ? (cluster_offset + offset_in_cluster) : - (src_cluster_offset + offset_in_cluster)) - >> BDRV_SECTOR_BITS; + (src_cluster_offset + offset_in_cluster)); assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0); assert((bytes & ~BDRV_SECTOR_MASK) == 0); assert(s->crypto); - if (qcrypto_block_encrypt(s->crypto, sector, buffer, - bytes, NULL) < 0) { + if (qcrypto_block_encrypt(s->crypto, offset, buffer, bytes, NULL) < 0) { return false; } } diff --git a/block/qcow2.c b/block/qcow2.c index b8da8ca105..33597394b5 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1811,7 +1811,7 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, if (qcrypto_block_decrypt(s->crypto, (s->crypt_physical_offset ? cluster_offset + offset_in_cluster : - offset) >> BDRV_SECTOR_BITS, + offset), cluster_data, cur_bytes, NULL) < 0) { @@ -1946,7 +1946,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, if (qcrypto_block_encrypt(s->crypto, (s->crypt_physical_offset ? cluster_offset + offset_in_cluster : - offset) >> BDRV_SECTOR_BITS, + offset), cluster_data, cur_bytes, NULL) < 0) { ret = -EIO; diff --git a/crypto/block-luks.c b/crypto/block-luks.c index a9062bb0f2..d418ac30b8 100644 --- a/crypto/block-luks.c +++ b/crypto/block-luks.c @@ -1399,29 +1399,33 @@ static void qcrypto_block_luks_cleanup(QCryptoBlock *block) static int qcrypto_block_luks_decrypt(QCryptoBlock *block, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp) { + assert(QEMU_IS_ALIGNED(offset, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(len, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)); return qcrypto_block_decrypt_helper(block->cipher, block->niv, block->ivgen, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE, - startsector, buf, len, errp); + offset, buf, len, errp); } static int qcrypto_block_luks_encrypt(QCryptoBlock *block, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp) { + assert(QEMU_IS_ALIGNED(offset, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(len, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)); return qcrypto_block_encrypt_helper(block->cipher, block->niv, block->ivgen, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE, - startsector, buf, len, errp); + offset, buf, len, errp); } diff --git a/crypto/block-qcow.c b/crypto/block-qcow.c index 4dd594a9ba..8817d6aaa7 100644 --- a/crypto/block-qcow.c +++ b/crypto/block-qcow.c @@ -143,29 +143,33 @@ qcrypto_block_qcow_cleanup(QCryptoBlock *block) static int qcrypto_block_qcow_decrypt(QCryptoBlock *block, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp) { + assert(QEMU_IS_ALIGNED(offset, QCRYPTO_BLOCK_QCOW_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(len, QCRYPTO_BLOCK_QCOW_SECTOR_SIZE)); return qcrypto_block_decrypt_helper(block->cipher, block->niv, block->ivgen, QCRYPTO_BLOCK_QCOW_SECTOR_SIZE, - startsector, buf, len, errp); + offset, buf, len, errp); } static int qcrypto_block_qcow_encrypt(QCryptoBlock *block, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp) { + assert(QEMU_IS_ALIGNED(offset, QCRYPTO_BLOCK_QCOW_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(len, QCRYPTO_BLOCK_QCOW_SECTOR_SIZE)); return qcrypto_block_encrypt_helper(block->cipher, block->niv, block->ivgen, QCRYPTO_BLOCK_QCOW_SECTOR_SIZE, - startsector, buf, len, errp); + offset, buf, len, errp); } diff --git a/crypto/block.c b/crypto/block.c index a7a9ad240e..f206d5eea8 100644 --- a/crypto/block.c +++ b/crypto/block.c @@ -127,22 +127,22 @@ QCryptoBlockInfo *qcrypto_block_get_info(QCryptoBlock *block, int qcrypto_block_decrypt(QCryptoBlock *block, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp) { - return block->driver->decrypt(block, startsector, buf, len, errp); + return block->driver->decrypt(block, offset, buf, len, errp); } int qcrypto_block_encrypt(QCryptoBlock *block, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp) { - return block->driver->encrypt(block, startsector, buf, len, errp); + return block->driver->encrypt(block, offset, buf, len, errp); } @@ -194,13 +194,17 @@ int qcrypto_block_decrypt_helper(QCryptoCipher *cipher, size_t niv, QCryptoIVGen *ivgen, int sectorsize, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp) { uint8_t *iv; int ret = -1; + uint64_t startsector = offset / sectorsize; + + assert(QEMU_IS_ALIGNED(offset, sectorsize)); + assert(QEMU_IS_ALIGNED(len, sectorsize)); iv = niv ? g_new0(uint8_t, niv) : NULL; @@ -243,13 +247,17 @@ int qcrypto_block_encrypt_helper(QCryptoCipher *cipher, size_t niv, QCryptoIVGen *ivgen, int sectorsize, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp) { uint8_t *iv; int ret = -1; + uint64_t startsector = offset / sectorsize; + + assert(QEMU_IS_ALIGNED(offset, sectorsize)); + assert(QEMU_IS_ALIGNED(len, sectorsize)); iv = niv ? g_new0(uint8_t, niv) : NULL; diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h index d227522d88..41840abcec 100644 --- a/crypto/blockpriv.h +++ b/crypto/blockpriv.h @@ -82,7 +82,7 @@ int qcrypto_block_decrypt_helper(QCryptoCipher *cipher, size_t niv, QCryptoIVGen *ivgen, int sectorsize, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp); @@ -91,7 +91,7 @@ int qcrypto_block_encrypt_helper(QCryptoCipher *cipher, size_t niv, QCryptoIVGen *ivgen, int sectorsize, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp); diff --git a/include/crypto/block.h b/include/crypto/block.h index 13232b2472..cd18f46d56 100644 --- a/include/crypto/block.h +++ b/include/crypto/block.h @@ -161,18 +161,19 @@ QCryptoBlockInfo *qcrypto_block_get_info(QCryptoBlock *block, /** * @qcrypto_block_decrypt: * @block: the block encryption object - * @startsector: the sector from which @buf was read + * @offset: the position at which @iov was read * @buf: the buffer to decrypt * @len: the length of @buf in bytes * @errp: pointer to a NULL-initialized error object * * Decrypt @len bytes of cipher text in @buf, writing - * plain text back into @buf + * plain text back into @buf. @len and @offset must be + * a multiple of the encryption format sector size. * * Returns 0 on success, -1 on failure */ int qcrypto_block_decrypt(QCryptoBlock *block, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp); @@ -180,18 +181,19 @@ int qcrypto_block_decrypt(QCryptoBlock *block, /** * @qcrypto_block_encrypt: * @block: the block encryption object - * @startsector: the sector to which @buf will be written + * @offset: the position at which @iov will be written * @buf: the buffer to decrypt * @len: the length of @buf in bytes * @errp: pointer to a NULL-initialized error object * * Encrypt @len bytes of plain text in @buf, writing - * cipher text back into @buf + * cipher text back into @buf. @len and @offset must be + * a multiple of the encryption format sector size. * * Returns 0 on success, -1 on failure */ int qcrypto_block_encrypt(QCryptoBlock *block, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp); From d67a6b09b4ac27a4fac07544ded79b40d2717a0d Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 27 Sep 2017 13:53:40 +0100 Subject: [PATCH 49/54] block: support passthrough of BDRV_REQ_FUA in crypto driver The BDRV_REQ_FUA flag can trivially be allowed in the crypt driver as a passthrough to the underlying block driver. Reviewed-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Daniel P. Berrange Message-id: 20170927125340.12360-7-berrange@redhat.com Signed-off-by: Max Reitz --- block/crypto.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/block/crypto.c b/block/crypto.c index edf53d49d1..60ddf8623e 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -279,6 +279,9 @@ static int block_crypto_open_generic(QCryptoBlockFormat format, return -EINVAL; } + bs->supported_write_flags = BDRV_REQ_FUA & + bs->file->bs->supported_write_flags; + opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); if (local_err) { @@ -462,7 +465,7 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); - assert(!flags); + assert(!(flags & ~BDRV_REQ_FUA)); assert(payload_offset < INT64_MAX); assert(QEMU_IS_ALIGNED(offset, sector_size)); assert(QEMU_IS_ALIGNED(bytes, sector_size)); @@ -495,7 +498,7 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, qemu_iovec_add(&hd_qiov, cipher_data, cur_bytes); ret = bdrv_co_pwritev(bs->file, payload_offset + offset + bytes_done, - cur_bytes, &hd_qiov, 0); + cur_bytes, &hd_qiov, flags); if (ret < 0) { goto cleanup; } From 18775ff32697ab6e1fd47989673bf1de54d0d942 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 28 Sep 2017 15:03:00 +0300 Subject: [PATCH 50/54] block/mirror: check backing in bdrv_mirror_top_refresh_filename Backing may be zero after failed bdrv_attach_child in bdrv_set_backing_hd, which leads to SIGSEGV. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-id: 20170928120300.58164-1-vsementsov@virtuozzo.com Reviewed-by: John Snow Signed-off-by: Max Reitz --- block/mirror.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/block/mirror.c b/block/mirror.c index 459b80f8f3..3b6f0c5772 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1058,6 +1058,11 @@ static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs, static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs, QDict *opts) { + if (bs->backing == NULL) { + /* we can be here after failed bdrv_attach_child in + * bdrv_set_backing_hd */ + return; + } bdrv_refresh_filename(bs->backing->bs); pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->backing->bs->filename); From 47500c6775813c8f2b5a5de04d84222f3cecc62d Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 27 Sep 2017 23:13:34 +0200 Subject: [PATCH 51/54] iotests: Fix 195 if IMGFMT is part of TEST_DIR do_run_qemu() in iotest 195 first applies _filter_imgfmt when printing qemu's command line and _filter_testdir only afterwards. Therefore, if the image format is part of the test directory path, _filter_testdir will no longer apply and the actual output will differ from the reference output even in case of success. For example, TEST_DIR might be "/tmp/test-qcow2", in which case _filter_imgfmt first transforms this to "/tmp/test-IMGFMT" which is no longer recognized as the TEST_DIR by _filter_testdir. Fix this by not applying _filter_imgfmt in do_run_qemu() but in run_qemu() instead, and only after _filter_testdir. Signed-off-by: Max Reitz Message-id: 20170927211334.3988-1-mreitz@redhat.com Reviewed-by: Eric Blake Signed-off-by: Max Reitz --- tests/qemu-iotests/195 | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/qemu-iotests/195 b/tests/qemu-iotests/195 index 05a239cbf5..e7a403ded2 100755 --- a/tests/qemu-iotests/195 +++ b/tests/qemu-iotests/195 @@ -44,15 +44,16 @@ _supported_os Linux function do_run_qemu() { - echo Testing: "$@" | _filter_imgfmt + echo Testing: "$@" $QEMU -nographic -qmp-pretty stdio -serial none "$@" echo } function run_qemu() { - do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qemu | _filter_qmp \ - | _filter_qemu_io | _filter_generated_node_ids + do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_imgfmt | _filter_qemu \ + | _filter_qmp | _filter_qemu_io \ + | _filter_generated_node_ids } size=64M From 76a2a30a99c670e9ec1b4a5d976868059c6bc258 Mon Sep 17 00:00:00 2001 From: Pavel Butsykin Date: Fri, 29 Sep 2017 15:16:12 +0300 Subject: [PATCH 52/54] qcow2: fix return error code in qcow2_truncate() Signed-off-by: Pavel Butsykin Reviewed-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Max Reitz Message-id: 20170929121613.25997-2-pbutsykin@virtuozzo.com Signed-off-by: Max Reitz --- block/qcow2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 33597394b5..960b3ab977 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3167,7 +3167,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, if (old_file_size < 0) { error_setg_errno(errp, -old_file_size, "Failed to inquire current file length"); - return ret; + return old_file_size; } nb_new_data_clusters = DIV_ROUND_UP(offset - old_length, @@ -3196,7 +3196,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, if (allocation_start < 0) { error_setg_errno(errp, -allocation_start, "Failed to resize refcount structures"); - return -allocation_start; + return allocation_start; } clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start, From 163bc39d2c2921430e5c23f4d0a0966d62f67a02 Mon Sep 17 00:00:00 2001 From: Pavel Butsykin Date: Fri, 29 Sep 2017 15:16:13 +0300 Subject: [PATCH 53/54] qcow2: truncate the tail of the image file after shrinking the image Now after shrinking the image, at the end of the image file, there might be a tail that probably will never be used. So we can find the last used cluster and cut the tail. Signed-off-by: Pavel Butsykin Reviewed-by: John Snow Message-id: 20170929121613.25997-3-pbutsykin@virtuozzo.com Signed-off-by: Max Reitz --- block/qcow2-refcount.c | 22 ++++++++++++++++++++++ block/qcow2.c | 23 +++++++++++++++++++++++ block/qcow2.h | 1 + 3 files changed, 46 insertions(+) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 88d5a3f1ad..aa3fd6cf17 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -3181,3 +3181,25 @@ out: g_free(reftable_tmp); return ret; } + +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size) +{ + BDRVQcow2State *s = bs->opaque; + int64_t i; + + for (i = size_to_clusters(s, size) - 1; i >= 0; i--) { + uint64_t refcount; + int ret = qcow2_get_refcount(bs, i, &refcount); + if (ret < 0) { + fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n", + i, strerror(-ret)); + return ret; + } + if (refcount > 0) { + return i; + } + } + qcow2_signal_corruption(bs, true, -1, -1, + "There are no references in the refcount table."); + return -EIO; +} diff --git a/block/qcow2.c b/block/qcow2.c index 960b3ab977..f63d1831f8 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3107,6 +3107,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, new_l1_size = size_to_l1(s, offset); if (offset < old_length) { + int64_t last_cluster, old_file_size; if (prealloc != PREALLOC_MODE_OFF) { error_setg(errp, "Preallocation can't be used for shrinking an image"); @@ -3135,6 +3136,28 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, "Failed to discard unused refblocks"); return ret; } + + old_file_size = bdrv_getlength(bs->file->bs); + if (old_file_size < 0) { + error_setg_errno(errp, -old_file_size, + "Failed to inquire current file length"); + return old_file_size; + } + last_cluster = qcow2_get_last_cluster(bs, old_file_size); + if (last_cluster < 0) { + error_setg_errno(errp, -last_cluster, + "Failed to find the last cluster"); + return last_cluster; + } + if ((last_cluster + 1) * s->cluster_size < old_file_size) { + ret = bdrv_truncate(bs->file, (last_cluster + 1) * s->cluster_size, + PREALLOC_MODE_OFF, NULL); + if (ret < 0) { + warn_report("Failed to truncate the tail of the image: %s", + strerror(-ret)); + ret = 0; + } + } } else { ret = qcow2_grow_l1_table(bs, new_l1_size, true); if (ret < 0) { diff --git a/block/qcow2.h b/block/qcow2.h index 5a289a81e2..782a206ecb 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -597,6 +597,7 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, BlockDriverAmendStatusCB *status_cb, void *cb_opaque, Error **errp); int qcow2_shrink_reftable(BlockDriverState *bs); +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size); /* qcow2-cluster.c functions */ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, From ce960aa9062a407d0ca15aee3dcd3bd84a4e24f9 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 29 Sep 2017 18:22:55 +0300 Subject: [PATCH 54/54] block/mirror: check backing in bdrv_mirror_top_flush Backing may be zero after failed bdrv_append in mirror_start_job, which leads to SIGSEGV. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-id: 20170929152255.5431-1-vsementsov@virtuozzo.com Signed-off-by: Max Reitz --- block/mirror.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/mirror.c b/block/mirror.c index 3b6f0c5772..153758ca9f 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1041,6 +1041,10 @@ static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs, static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs) { + if (bs->backing == NULL) { + /* we can be here after failed bdrv_append in mirror_start_job */ + return 0; + } return bdrv_co_flush(bs->backing->bs); }