From 86ec252c198a62b3a1da3ac62277adf917d985cf Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 5 Oct 2016 18:35:26 +0200 Subject: [PATCH 01/13] quorum: change child_iter to children_read This simplifies a bit the code by using the usual C "inclusive start, exclusive end" pattern for ranges. Signed-off-by: Paolo Bonzini Message-id: 1475685327-22767-2-git-send-email-pbonzini@redhat.com Reviewed-by: Max Reitz Reviewed-by: Alberto Garcia Signed-off-by: Max Reitz --- block/quorum.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/block/quorum.c b/block/quorum.c index 9cf876fb34..435296eef6 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -130,7 +130,7 @@ struct QuorumAIOCB { bool is_read; int vote_ret; - int child_iter; /* which child to read in fifo pattern */ + int children_read; /* how many children have been read from */ }; static bool quorum_vote(QuorumAIOCB *acb); @@ -165,8 +165,7 @@ static void quorum_aio_finalize(QuorumAIOCB *acb) acb->common.cb(acb->common.opaque, ret); if (acb->is_read) { - /* on the quorum case acb->child_iter == s->num_children - 1 */ - for (i = 0; i <= acb->child_iter; i++) { + for (i = 0; i < acb->children_read; i++) { qemu_vfree(acb->qcrs[i].buf); qemu_iovec_destroy(&acb->qcrs[i].qiov); } @@ -301,14 +300,13 @@ static void quorum_aio_cb(void *opaque, int ret) if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) { /* We try to read next child in FIFO order if we fail to read */ - if (ret < 0 && (acb->child_iter + 1) < s->num_children) { - acb->child_iter++; + if (ret < 0 && acb->children_read < s->num_children) { read_fifo_child(acb); return; } if (ret == 0) { - quorum_copy_qiov(acb->qiov, &acb->qcrs[acb->child_iter].qiov); + quorum_copy_qiov(acb->qiov, &acb->qcrs[acb->children_read - 1].qiov); } acb->vote_ret = ret; quorum_aio_finalize(acb); @@ -653,6 +651,7 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb) BDRVQuorumState *s = acb->common.bs->opaque; int i; + acb->children_read = s->num_children; for (i = 0; i < s->num_children; i++) { acb->qcrs[i].buf = qemu_blockalign(s->children[i]->bs, acb->qiov->size); qemu_iovec_init(&acb->qcrs[i].qiov, acb->qiov->niov); @@ -671,16 +670,14 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb) static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb) { BDRVQuorumState *s = acb->common.bs->opaque; + int n = acb->children_read++; - acb->qcrs[acb->child_iter].buf = - qemu_blockalign(s->children[acb->child_iter]->bs, acb->qiov->size); - qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov); - qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov, - acb->qcrs[acb->child_iter].buf); - acb->qcrs[acb->child_iter].aiocb = - bdrv_aio_readv(s->children[acb->child_iter], acb->sector_num, - &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors, - quorum_aio_cb, &acb->qcrs[acb->child_iter]); + acb->qcrs[n].buf = qemu_blockalign(s->children[n]->bs, acb->qiov->size); + qemu_iovec_init(&acb->qcrs[n].qiov, acb->qiov->niov); + qemu_iovec_clone(&acb->qcrs[n].qiov, acb->qiov, acb->qcrs[n].buf); + acb->qcrs[n].aiocb = bdrv_aio_readv(s->children[n], acb->sector_num, + &acb->qcrs[n].qiov, acb->nb_sectors, + quorum_aio_cb, &acb->qcrs[n]); return &acb->common; } @@ -696,13 +693,12 @@ static BlockAIOCB *quorum_aio_readv(BlockDriverState *bs, QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors, cb, opaque); acb->is_read = true; + acb->children_read = 0; if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) { - acb->child_iter = s->num_children - 1; return read_quorum_children(acb); } - acb->child_iter = 0; return read_fifo_child(acb); } From 1ba7e159787d7c96a8783584395b670c22bba4ef Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 5 Oct 2016 18:35:27 +0200 Subject: [PATCH 02/13] quorum: do not allocate multiple iovecs for FIFO strategy In FIFO mode there are no parallel reads, hence there is no need to allocate separate buffers and clone the iovecs. The two cases of quorum_aio_cb are now even more different, and most of quorum_aio_finalize is only needed in one of them, so split them in separate functions. Signed-off-by: Paolo Bonzini Message-id: 1475685327-22767-3-git-send-email-pbonzini@redhat.com Reviewed-by: Max Reitz Reviewed-by: Alberto Garcia Signed-off-by: Max Reitz --- block/quorum.c | 83 +++++++++++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 41 deletions(-) diff --git a/block/quorum.c b/block/quorum.c index 435296eef6..d122299352 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -156,21 +156,7 @@ static AIOCBInfo quorum_aiocb_info = { static void quorum_aio_finalize(QuorumAIOCB *acb) { - int i, ret = 0; - - if (acb->vote_ret) { - ret = acb->vote_ret; - } - - acb->common.cb(acb->common.opaque, ret); - - if (acb->is_read) { - for (i = 0; i < acb->children_read; i++) { - qemu_vfree(acb->qcrs[i].buf); - qemu_iovec_destroy(&acb->qcrs[i].qiov); - } - } - + acb->common.cb(acb->common.opaque, acb->vote_ret); g_free(acb->qcrs); qemu_aio_unref(acb); } @@ -282,38 +268,52 @@ static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source) } } +static void quorum_report_bad_acb(QuorumChildRequest *sacb, int ret) +{ + QuorumAIOCB *acb = sacb->parent; + QuorumOpType type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE; + quorum_report_bad(type, acb->sector_num, acb->nb_sectors, + sacb->aiocb->bs->node_name, ret); +} + +static void quorum_fifo_aio_cb(void *opaque, int ret) +{ + QuorumChildRequest *sacb = opaque; + QuorumAIOCB *acb = sacb->parent; + BDRVQuorumState *s = acb->common.bs->opaque; + + assert(acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO); + + if (ret < 0) { + quorum_report_bad_acb(sacb, ret); + + /* We try to read next child in FIFO order if we fail to read */ + if (acb->children_read < s->num_children) { + read_fifo_child(acb); + return; + } + } + + acb->vote_ret = ret; + + /* FIXME: rewrite failed children if acb->children_read > 1? */ + quorum_aio_finalize(acb); +} + static void quorum_aio_cb(void *opaque, int ret) { QuorumChildRequest *sacb = opaque; QuorumAIOCB *acb = sacb->parent; BDRVQuorumState *s = acb->common.bs->opaque; bool rewrite = false; + int i; + sacb->ret = ret; if (ret == 0) { acb->success_count++; } else { - QuorumOpType type; - type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE; - quorum_report_bad(type, acb->sector_num, acb->nb_sectors, - sacb->aiocb->bs->node_name, ret); + quorum_report_bad_acb(sacb, ret); } - - if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) { - /* We try to read next child in FIFO order if we fail to read */ - if (ret < 0 && acb->children_read < s->num_children) { - read_fifo_child(acb); - return; - } - - if (ret == 0) { - quorum_copy_qiov(acb->qiov, &acb->qcrs[acb->children_read - 1].qiov); - } - acb->vote_ret = ret; - quorum_aio_finalize(acb); - return; - } - - sacb->ret = ret; acb->count++; assert(acb->count <= s->num_children); assert(acb->success_count <= s->num_children); @@ -324,6 +324,10 @@ static void quorum_aio_cb(void *opaque, int ret) /* Do the vote on read */ if (acb->is_read) { rewrite = quorum_vote(acb); + for (i = 0; i < s->num_children; i++) { + qemu_vfree(acb->qcrs[i].buf); + qemu_iovec_destroy(&acb->qcrs[i].qiov); + } } else { quorum_has_too_much_io_failed(acb); } @@ -672,12 +676,9 @@ static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb) BDRVQuorumState *s = acb->common.bs->opaque; int n = acb->children_read++; - acb->qcrs[n].buf = qemu_blockalign(s->children[n]->bs, acb->qiov->size); - qemu_iovec_init(&acb->qcrs[n].qiov, acb->qiov->niov); - qemu_iovec_clone(&acb->qcrs[n].qiov, acb->qiov, acb->qcrs[n].buf); acb->qcrs[n].aiocb = bdrv_aio_readv(s->children[n], acb->sector_num, - &acb->qcrs[n].qiov, acb->nb_sectors, - quorum_aio_cb, &acb->qcrs[n]); + acb->qiov, acb->nb_sectors, + quorum_fifo_aio_cb, &acb->qcrs[n]); return &acb->common; } From dc162c8e4f088b08575460cca35b042d58c141aa Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 13 Oct 2016 17:58:21 -0400 Subject: [PATCH 03/13] block: Hide HBitmap in block dirty bitmap interface HBitmap is an implementation detail of block dirty bitmap that should be hidden from users. Introduce a BdrvDirtyBitmapIter to encapsulate the underlying HBitmapIter. A small difference in the interface is, before, an HBitmapIter is initialized in place, now the new BdrvDirtyBitmapIter must be dynamically allocated because the structure definition is in block/dirty-bitmap.c. Two current users are converted too. Signed-off-by: Fam Zheng Reviewed-by: Max Reitz Signed-off-by: John Snow Message-id: 1476395910-8697-2-git-send-email-jsnow@redhat.com Signed-off-by: Max Reitz --- block/backup.c | 14 +++++++------ block/dirty-bitmap.c | 39 ++++++++++++++++++++++++++++++------ block/mirror.c | 24 ++++++++++++---------- include/block/dirty-bitmap.h | 7 +++++-- include/qemu/typedefs.h | 1 + 5 files changed, 60 insertions(+), 25 deletions(-) diff --git a/block/backup.c b/block/backup.c index 582bd0f7ee..02dbe48035 100644 --- a/block/backup.c +++ b/block/backup.c @@ -372,14 +372,14 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) int64_t end; int64_t last_cluster = -1; int64_t sectors_per_cluster = cluster_size_sectors(job); - HBitmapIter hbi; + BdrvDirtyBitmapIter *dbi; granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap); clusters_per_iter = MAX((granularity / job->cluster_size), 1); - bdrv_dirty_iter_init(job->sync_bitmap, &hbi); + dbi = bdrv_dirty_iter_new(job->sync_bitmap, 0); /* Find the next dirty sector(s) */ - while ((sector = hbitmap_iter_next(&hbi)) != -1) { + while ((sector = bdrv_dirty_iter_next(dbi)) != -1) { cluster = sector / sectors_per_cluster; /* Fake progress updates for any clusters we skipped */ @@ -391,7 +391,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) for (end = cluster + clusters_per_iter; cluster < end; cluster++) { do { if (yield_and_check(job)) { - return ret; + goto out; } ret = backup_do_cow(job, cluster * sectors_per_cluster, sectors_per_cluster, &error_is_read, @@ -399,7 +399,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) if ((ret < 0) && backup_error_action(job, error_is_read, -ret) == BLOCK_ERROR_ACTION_REPORT) { - return ret; + goto out; } } while (ret < 0); } @@ -407,7 +407,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) /* If the bitmap granularity is smaller than the backup granularity, * we need to advance the iterator pointer to the next cluster. */ if (granularity < job->cluster_size) { - bdrv_set_dirty_iter(&hbi, cluster * sectors_per_cluster); + bdrv_set_dirty_iter(dbi, cluster * sectors_per_cluster); } last_cluster = cluster - 1; @@ -419,6 +419,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) job->common.offset += ((end - last_cluster - 1) * job->cluster_size); } +out: + bdrv_dirty_iter_free(dbi); return ret; } diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index f2bfdcfdea..c572dfa941 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -42,9 +42,15 @@ struct BdrvDirtyBitmap { char *name; /* Optional non-empty unique ID */ int64_t size; /* Size of the bitmap (Number of sectors) */ bool disabled; /* Bitmap is read-only */ + int active_iterators; /* How many iterators are active */ QLIST_ENTRY(BdrvDirtyBitmap) list; }; +struct BdrvDirtyBitmapIter { + HBitmapIter hbi; + BdrvDirtyBitmap *bitmap; +}; + BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) { BdrvDirtyBitmap *bm; @@ -212,6 +218,7 @@ void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { assert(!bdrv_dirty_bitmap_frozen(bitmap)); + assert(!bitmap->active_iterators); hbitmap_truncate(bitmap->bitmap, size); bitmap->size = size; } @@ -224,6 +231,7 @@ static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bm, *next; QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) { + assert(!bm->active_iterators); assert(!bdrv_dirty_bitmap_frozen(bm)); QLIST_REMOVE(bm, list); hbitmap_free(bm->bitmap); @@ -320,9 +328,29 @@ uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap) return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); } -void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) +BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, + uint64_t first_sector) { - hbitmap_iter_init(hbi, bitmap->bitmap, 0); + BdrvDirtyBitmapIter *iter = g_new(BdrvDirtyBitmapIter, 1); + hbitmap_iter_init(&iter->hbi, bitmap->bitmap, first_sector); + iter->bitmap = bitmap; + bitmap->active_iterators++; + return iter; +} + +void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter) +{ + if (!iter) { + return; + } + assert(iter->bitmap->active_iterators > 0); + iter->bitmap->active_iterators--; + g_free(iter); +} + +int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter) +{ + return hbitmap_iter_next(&iter->hbi); } void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, @@ -373,12 +401,11 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, } /** - * Advance an HBitmapIter to an arbitrary offset. + * Advance a BdrvDirtyBitmapIter to an arbitrary offset. */ -void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset) +void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t sector_num) { - assert(hbi->hb); - hbitmap_iter_init(hbi, hbi->hb, offset); + hbitmap_iter_init(&iter->hbi, iter->hbi.hb, sector_num); } int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) diff --git a/block/mirror.c b/block/mirror.c index f9d1fecaa0..a433e6848c 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -55,7 +55,7 @@ typedef struct MirrorBlockJob { int64_t bdev_length; unsigned long *cow_bitmap; BdrvDirtyBitmap *dirty_bitmap; - HBitmapIter hbi; + BdrvDirtyBitmapIter *dbi; uint8_t *buf; QSIMPLEQ_HEAD(, MirrorBuffer) buf_free; int buf_free_count; @@ -330,10 +330,10 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT, MAX_IO_SECTORS); - sector_num = hbitmap_iter_next(&s->hbi); + sector_num = bdrv_dirty_iter_next(s->dbi); if (sector_num < 0) { - bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi); - sector_num = hbitmap_iter_next(&s->hbi); + bdrv_set_dirty_iter(s->dbi, 0); + sector_num = bdrv_dirty_iter_next(s->dbi); trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap)); assert(sector_num >= 0); } @@ -349,7 +349,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) /* Find the number of consective dirty chunks following the first dirty * one, and wait for in flight requests in them. */ while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) { - int64_t hbitmap_next; + int64_t next_dirty; int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk; int64_t next_chunk = next_sector / sectors_per_chunk; if (next_sector >= end || @@ -360,13 +360,13 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) break; } - hbitmap_next = hbitmap_iter_next(&s->hbi); - if (hbitmap_next > next_sector || hbitmap_next < 0) { + next_dirty = bdrv_dirty_iter_next(s->dbi); + if (next_dirty > next_sector || next_dirty < 0) { /* The bitmap iterator's cache is stale, refresh it */ - bdrv_set_dirty_iter(&s->hbi, next_sector); - hbitmap_next = hbitmap_iter_next(&s->hbi); + bdrv_set_dirty_iter(s->dbi, next_sector); + next_dirty = bdrv_dirty_iter_next(s->dbi); } - assert(hbitmap_next == next_sector); + assert(next_dirty == next_sector); nb_chunks++; } @@ -679,7 +679,8 @@ static void coroutine_fn mirror_run(void *opaque) } } - bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi); + assert(!s->dbi); + s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap, 0); for (;;) { uint64_t delay_ns = 0; int64_t cnt, delta; @@ -793,6 +794,7 @@ immediate_exit: qemu_vfree(s->buf); g_free(s->cow_bitmap); g_free(s->in_flight_bitmap); + bdrv_dirty_iter_free(s->dbi); bdrv_release_dirty_bitmap(bs, s->dirty_bitmap); data = g_malloc(sizeof(*data)); diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index ee3388f90d..0ef927d6ce 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -36,8 +36,11 @@ void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int64_t nr_sectors); void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int64_t nr_sectors); -void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi); -void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset); +BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, + uint64_t first_sector); +void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter); +int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter); +void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num); int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index b113fcf156..1b8c30a7a0 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -11,6 +11,7 @@ typedef struct AioContext AioContext; typedef struct AllwinnerAHCIState AllwinnerAHCIState; typedef struct AudioState AudioState; typedef struct BdrvDirtyBitmap BdrvDirtyBitmap; +typedef struct BdrvDirtyBitmapIter BdrvDirtyBitmapIter; typedef struct BlockBackend BlockBackend; typedef struct BlockBackendRootState BlockBackendRootState; typedef struct BlockDriverState BlockDriverState; From 07ac4cdb570c9cf6aed0338414f4f09fee0b549c Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 13 Oct 2016 17:58:22 -0400 Subject: [PATCH 04/13] HBitmap: Introduce "meta" bitmap to track bit changes Upon each bit toggle, the corresponding bit in the meta bitmap will be set. Signed-off-by: Fam Zheng [Amended text inline. --js] Reviewed-by: Max Reitz Signed-off-by: John Snow Message-id: 1476395910-8697-3-git-send-email-jsnow@redhat.com Signed-off-by: Max Reitz --- include/qemu/hbitmap.h | 21 +++++++++++++ util/hbitmap.c | 69 +++++++++++++++++++++++++++++++++--------- 2 files changed, 75 insertions(+), 15 deletions(-) diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h index 8ab721e5aa..17259191b6 100644 --- a/include/qemu/hbitmap.h +++ b/include/qemu/hbitmap.h @@ -178,6 +178,27 @@ void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first); */ unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi); +/* hbitmap_create_meta: + * Create a "meta" hbitmap to track dirtiness of the bits in this HBitmap. + * The caller owns the created bitmap and must call hbitmap_free_meta(hb) to + * free it. + * + * Currently, we only guarantee that if a bit in the hbitmap is changed it + * will be reflected in the meta bitmap, but we do not yet guarantee the + * opposite. + * + * @hb: The HBitmap to operate on. + * @chunk_size: How many bits in @hb does one bit in the meta track. + */ +HBitmap *hbitmap_create_meta(HBitmap *hb, int chunk_size); + +/* hbitmap_free_meta: + * Free the meta bitmap of @hb. + * + * @hb: The HBitmap whose meta bitmap should be freed. + */ +void hbitmap_free_meta(HBitmap *hb); + /** * hbitmap_iter_next: * @hbi: HBitmapIter to operate on. diff --git a/util/hbitmap.c b/util/hbitmap.c index 99fd2ba37b..f303975371 100644 --- a/util/hbitmap.c +++ b/util/hbitmap.c @@ -78,6 +78,9 @@ struct HBitmap { */ int granularity; + /* A meta dirty bitmap to track the dirtiness of bits in this HBitmap. */ + HBitmap *meta; + /* A number of progressively less coarse bitmaps (i.e. level 0 is the * coarsest). Each bit in level N represents a word in level N+1 that * has a set bit, except the last level where each bit represents the @@ -209,25 +212,27 @@ static uint64_t hb_count_between(HBitmap *hb, uint64_t start, uint64_t last) } /* Setting starts at the last layer and propagates up if an element - * changes from zero to non-zero. + * changes. */ static inline bool hb_set_elem(unsigned long *elem, uint64_t start, uint64_t last) { unsigned long mask; - bool changed; + unsigned long old; assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL)); assert(start <= last); mask = 2UL << (last & (BITS_PER_LONG - 1)); mask -= 1UL << (start & (BITS_PER_LONG - 1)); - changed = (*elem == 0); + old = *elem; *elem |= mask; - return changed; + return old != *elem; } -/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */ -static void hb_set_between(HBitmap *hb, int level, uint64_t start, uint64_t last) +/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... + * Returns true if at least one bit is changed. */ +static bool hb_set_between(HBitmap *hb, int level, uint64_t start, + uint64_t last) { size_t pos = start >> BITS_PER_LEVEL; size_t lastpos = last >> BITS_PER_LEVEL; @@ -256,23 +261,28 @@ static void hb_set_between(HBitmap *hb, int level, uint64_t start, uint64_t last if (level > 0 && changed) { hb_set_between(hb, level - 1, pos, lastpos); } + return changed; } void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count) { /* Compute range in the last layer. */ + uint64_t first, n; uint64_t last = start + count - 1; trace_hbitmap_set(hb, start, count, start >> hb->granularity, last >> hb->granularity); - start >>= hb->granularity; + first = start >> hb->granularity; last >>= hb->granularity; - count = last - start + 1; assert(last < hb->size); + n = last - first + 1; - hb->count += count - hb_count_between(hb, start, last); - hb_set_between(hb, HBITMAP_LEVELS - 1, start, last); + hb->count += n - hb_count_between(hb, first, last); + if (hb_set_between(hb, HBITMAP_LEVELS - 1, first, last) && + hb->meta) { + hbitmap_set(hb->meta, start, count); + } } /* Resetting works the other way round: propagate up if the new @@ -293,8 +303,10 @@ static inline bool hb_reset_elem(unsigned long *elem, uint64_t start, uint64_t l return blanked; } -/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */ -static void hb_reset_between(HBitmap *hb, int level, uint64_t start, uint64_t last) +/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... + * Returns true if at least one bit is changed. */ +static bool hb_reset_between(HBitmap *hb, int level, uint64_t start, + uint64_t last) { size_t pos = start >> BITS_PER_LEVEL; size_t lastpos = last >> BITS_PER_LEVEL; @@ -337,22 +349,29 @@ static void hb_reset_between(HBitmap *hb, int level, uint64_t start, uint64_t la if (level > 0 && changed) { hb_reset_between(hb, level - 1, pos, lastpos); } + + return changed; + } void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count) { /* Compute range in the last layer. */ + uint64_t first; uint64_t last = start + count - 1; trace_hbitmap_reset(hb, start, count, start >> hb->granularity, last >> hb->granularity); - start >>= hb->granularity; + first = start >> hb->granularity; last >>= hb->granularity; assert(last < hb->size); - hb->count -= hb_count_between(hb, start, last); - hb_reset_between(hb, HBITMAP_LEVELS - 1, start, last); + hb->count -= hb_count_between(hb, first, last); + if (hb_reset_between(hb, HBITMAP_LEVELS - 1, first, last) && + hb->meta) { + hbitmap_set(hb->meta, start, count); + } } void hbitmap_reset_all(HBitmap *hb) @@ -381,6 +400,7 @@ bool hbitmap_get(const HBitmap *hb, uint64_t item) void hbitmap_free(HBitmap *hb) { unsigned i; + assert(!hb->meta); for (i = HBITMAP_LEVELS; i-- > 0; ) { g_free(hb->levels[i]); } @@ -458,6 +478,9 @@ void hbitmap_truncate(HBitmap *hb, uint64_t size) (size - old) * sizeof(*hb->levels[i])); } } + if (hb->meta) { + hbitmap_truncate(hb->meta, hb->size << hb->granularity); + } } @@ -493,3 +516,19 @@ bool hbitmap_merge(HBitmap *a, const HBitmap *b) return true; } + +HBitmap *hbitmap_create_meta(HBitmap *hb, int chunk_size) +{ + assert(!(chunk_size & (chunk_size - 1))); + assert(!hb->meta); + hb->meta = hbitmap_alloc(hb->size << hb->granularity, + hb->granularity + ctz32(chunk_size)); + return hb->meta; +} + +void hbitmap_free_meta(HBitmap *hb) +{ + assert(hb->meta); + hbitmap_free(hb->meta); + hb->meta = NULL; +} From 4b62818a4f3075fce1e9335d91beef737a8fc9d3 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 13 Oct 2016 17:58:23 -0400 Subject: [PATCH 05/13] tests: Add test code for meta bitmap Signed-off-by: Fam Zheng Reviewed-by: John Snow Reviewed-by: Max Reitz Signed-off-by: John Snow Message-id: 1476395910-8697-4-git-send-email-jsnow@redhat.com Signed-off-by: Max Reitz --- tests/test-hbitmap.c | 116 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c index c0e9895fb9..e3abde1aae 100644 --- a/tests/test-hbitmap.c +++ b/tests/test-hbitmap.c @@ -11,6 +11,7 @@ #include "qemu/osdep.h" #include "qemu/hbitmap.h" +#include "block/block.h" #define LOG_BITS_PER_LONG (BITS_PER_LONG == 32 ? 5 : 6) @@ -20,6 +21,7 @@ typedef struct TestHBitmapData { HBitmap *hb; + HBitmap *meta; unsigned long *bits; size_t size; size_t old_size; @@ -91,6 +93,14 @@ static void hbitmap_test_init(TestHBitmapData *data, } } +static void hbitmap_test_init_meta(TestHBitmapData *data, + uint64_t size, int granularity, + int meta_chunk) +{ + hbitmap_test_init(data, size, granularity); + data->meta = hbitmap_create_meta(data->hb, meta_chunk); +} + static inline size_t hbitmap_test_array_size(size_t bits) { size_t n = DIV_ROUND_UP(bits, BITS_PER_LONG); @@ -133,6 +143,9 @@ static void hbitmap_test_teardown(TestHBitmapData *data, const void *unused) { if (data->hb) { + if (data->meta) { + hbitmap_free_meta(data->hb); + } hbitmap_free(data->hb); data->hb = NULL; } @@ -634,6 +647,103 @@ static void test_hbitmap_truncate_shrink_large(TestHBitmapData *data, hbitmap_test_truncate(data, size, -diff, 0); } +static void hbitmap_check_meta(TestHBitmapData *data, + int64_t start, int count) +{ + int64_t i; + + for (i = 0; i < data->size; i++) { + if (i >= start && i < start + count) { + g_assert(hbitmap_get(data->meta, i)); + } else { + g_assert(!hbitmap_get(data->meta, i)); + } + } +} + +static void hbitmap_test_meta(TestHBitmapData *data, + int64_t start, int count, + int64_t check_start, int check_count) +{ + hbitmap_reset_all(data->hb); + hbitmap_reset_all(data->meta); + + /* Test "unset" -> "unset" will not update meta. */ + hbitmap_reset(data->hb, start, count); + hbitmap_check_meta(data, 0, 0); + + /* Test "unset" -> "set" will update meta */ + hbitmap_set(data->hb, start, count); + hbitmap_check_meta(data, check_start, check_count); + + /* Test "set" -> "set" will not update meta */ + hbitmap_reset_all(data->meta); + hbitmap_set(data->hb, start, count); + hbitmap_check_meta(data, 0, 0); + + /* Test "set" -> "unset" will update meta */ + hbitmap_reset_all(data->meta); + hbitmap_reset(data->hb, start, count); + hbitmap_check_meta(data, check_start, check_count); +} + +static void hbitmap_test_meta_do(TestHBitmapData *data, int chunk_size) +{ + uint64_t size = chunk_size * 100; + hbitmap_test_init_meta(data, size, 0, chunk_size); + + hbitmap_test_meta(data, 0, 1, 0, chunk_size); + hbitmap_test_meta(data, 0, chunk_size, 0, chunk_size); + hbitmap_test_meta(data, chunk_size - 1, 1, 0, chunk_size); + hbitmap_test_meta(data, chunk_size - 1, 2, 0, chunk_size * 2); + hbitmap_test_meta(data, chunk_size - 1, chunk_size + 1, 0, chunk_size * 2); + hbitmap_test_meta(data, chunk_size - 1, chunk_size + 2, 0, chunk_size * 3); + hbitmap_test_meta(data, 7 * chunk_size - 1, chunk_size + 2, + 6 * chunk_size, chunk_size * 3); + hbitmap_test_meta(data, size - 1, 1, size - chunk_size, chunk_size); + hbitmap_test_meta(data, 0, size, 0, size); +} + +static void test_hbitmap_meta_byte(TestHBitmapData *data, const void *unused) +{ + hbitmap_test_meta_do(data, BITS_PER_BYTE); +} + +static void test_hbitmap_meta_word(TestHBitmapData *data, const void *unused) +{ + hbitmap_test_meta_do(data, BITS_PER_LONG); +} + +static void test_hbitmap_meta_sector(TestHBitmapData *data, const void *unused) +{ + hbitmap_test_meta_do(data, BDRV_SECTOR_SIZE * BITS_PER_BYTE); +} + +/** + * Create an HBitmap and test set/unset. + */ +static void test_hbitmap_meta_one(TestHBitmapData *data, const void *unused) +{ + int i; + int64_t offsets[] = { + 0, 1, L1 - 1, L1, L1 + 1, L2 - 1, L2, L2 + 1, L3 - 1, L3, L3 + 1 + }; + + hbitmap_test_init_meta(data, L3 * 2, 0, 1); + for (i = 0; i < ARRAY_SIZE(offsets); i++) { + hbitmap_test_meta(data, offsets[i], 1, offsets[i], 1); + hbitmap_test_meta(data, offsets[i], L1, offsets[i], L1); + hbitmap_test_meta(data, offsets[i], L2, offsets[i], L2); + } +} + +static void test_hbitmap_meta_zero(TestHBitmapData *data, const void *unused) +{ + hbitmap_test_init_meta(data, 0, 0, 1); + + hbitmap_check_meta(data, 0, 0); +} + static void hbitmap_test_add(const char *testpath, void (*test_func)(TestHBitmapData *data, const void *user_data)) { @@ -683,6 +793,12 @@ int main(int argc, char **argv) test_hbitmap_truncate_grow_large); hbitmap_test_add("/hbitmap/truncate/shrink/large", test_hbitmap_truncate_shrink_large); + + hbitmap_test_add("/hbitmap/meta/zero", test_hbitmap_meta_zero); + hbitmap_test_add("/hbitmap/meta/one", test_hbitmap_meta_one); + hbitmap_test_add("/hbitmap/meta/byte", test_hbitmap_meta_byte); + hbitmap_test_add("/hbitmap/meta/word", test_hbitmap_meta_word); + hbitmap_test_add("/hbitmap/meta/sector", test_hbitmap_meta_sector); g_test_run(); return 0; From fb933437de327a9a322d4c2473d0c23aa8d9a267 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 13 Oct 2016 17:58:24 -0400 Subject: [PATCH 06/13] block: Support meta dirty bitmap The added group of operations enables tracking of the changed bits in the dirty bitmap. Signed-off-by: Fam Zheng Reviewed-by: Max Reitz Signed-off-by: John Snow Message-id: 1476395910-8697-5-git-send-email-jsnow@redhat.com Signed-off-by: Max Reitz --- block/dirty-bitmap.c | 52 ++++++++++++++++++++++++++++++++++++ include/block/dirty-bitmap.h | 9 +++++++ 2 files changed, 61 insertions(+) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index c572dfa941..9c6febbcb7 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -38,6 +38,7 @@ */ struct BdrvDirtyBitmap { HBitmap *bitmap; /* Dirty sector bitmap implementation */ + HBitmap *meta; /* Meta dirty bitmap */ BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ char *name; /* Optional non-empty unique ID */ int64_t size; /* Size of the bitmap (Number of sectors) */ @@ -103,6 +104,56 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, return bitmap; } +/* bdrv_create_meta_dirty_bitmap + * + * Create a meta dirty bitmap that tracks the changes of bits in @bitmap. I.e. + * when a dirty status bit in @bitmap is changed (either from reset to set or + * the other way around), its respective meta dirty bitmap bit will be marked + * dirty as well. + * + * @bitmap: the block dirty bitmap for which to create a meta dirty bitmap. + * @chunk_size: how many bytes of bitmap data does each bit in the meta bitmap + * track. + */ +void bdrv_create_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int chunk_size) +{ + assert(!bitmap->meta); + bitmap->meta = hbitmap_create_meta(bitmap->bitmap, + chunk_size * BITS_PER_BYTE); +} + +void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap) +{ + assert(bitmap->meta); + hbitmap_free_meta(bitmap->bitmap); + bitmap->meta = NULL; +} + +int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, int64_t sector, + int nb_sectors) +{ + uint64_t i; + int sectors_per_bit = 1 << hbitmap_granularity(bitmap->meta); + + /* To optimize: we can make hbitmap to internally check the range in a + * coarse level, or at least do it word by word. */ + for (i = sector; i < sector + nb_sectors; i += sectors_per_bit) { + if (hbitmap_get(bitmap->meta, i)) { + return true; + } + } + return false; +} + +void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, int64_t sector, + int nb_sectors) +{ + hbitmap_reset(bitmap->meta, sector, nb_sectors); +} + bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap) { return bitmap->successor; @@ -233,6 +284,7 @@ static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs, if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) { assert(!bm->active_iterators); assert(!bdrv_dirty_bitmap_frozen(bm)); + assert(!bm->meta); QLIST_REMOVE(bm, list); hbitmap_free(bm->bitmap); g_free(bm->name); diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index 0ef927d6ce..69c500ba2f 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -8,6 +8,9 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, uint32_t granularity, const char *name, Error **errp); +void bdrv_create_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int chunk_size); +void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap); int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, Error **errp); @@ -36,6 +39,12 @@ void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int64_t nr_sectors); void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int64_t nr_sectors); +int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, int64_t sector, + int nb_sectors); +void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, int64_t sector, + int nb_sectors); BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, uint64_t first_sector); void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter); From 15891fac7dd0300f94cca081cfef51b074cb988b Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 13 Oct 2016 17:58:25 -0400 Subject: [PATCH 07/13] block: Add two dirty bitmap getters For dirty bitmap users to get the size and the name of a BdrvDirtyBitmap. Signed-off-by: Fam Zheng Reviewed-by: John Snow Reviewed-by: Max Reitz Signed-off-by: John Snow Message-id: 1476395910-8697-6-git-send-email-jsnow@redhat.com Signed-off-by: Max Reitz --- block/dirty-bitmap.c | 10 ++++++++++ include/block/dirty-bitmap.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 9c6febbcb7..860acc9b5b 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -154,6 +154,16 @@ void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs, hbitmap_reset(bitmap->meta, sector, nb_sectors); } +int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap) +{ + return bitmap->size; +} + +const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap) +{ + return bitmap->name; +} + bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap) { return bitmap->successor; diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index 69c500ba2f..c4e7858a62 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -32,6 +32,8 @@ uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs); uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap); bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap); bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap); +const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap); +int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap); DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap); int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector); From 7105007a5c8ae6228d78487e1e382d53863ab2c9 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 13 Oct 2016 17:58:26 -0400 Subject: [PATCH 08/13] block: Assert that bdrv_release_dirty_bitmap succeeded We use a loop over bs->dirty_bitmaps to make sure the caller is only releasing a bitmap owned by bs. Let's also assert that in this case the caller is releasing a bitmap that does exist. Signed-off-by: Fam Zheng Reviewed-by: Max Reitz Signed-off-by: John Snow Message-id: 1476395910-8697-7-git-send-email-jsnow@redhat.com Signed-off-by: Max Reitz --- block/dirty-bitmap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 860acc9b5b..31d5296ef7 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -305,6 +305,9 @@ static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs, } } } + if (bitmap) { + abort(); + } } void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) From 8258888e222ff03bf791d81a0001cdc40b878de4 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 13 Oct 2016 17:58:27 -0400 Subject: [PATCH 09/13] hbitmap: serialization Functions to serialize / deserialize(restore) HBitmap. HBitmap should be saved to linear sequence of bits independently of endianness and bitmap array element (unsigned long) size. Therefore Little Endian is chosen. These functions are appropriate for dirty bitmap migration, restoring the bitmap in several steps is available. To save performance, every step writes only the last level of the bitmap. All other levels are restored by hbitmap_deserialize_finish() as a last step of restoring. So, HBitmap is inconsistent while restoring. Signed-off-by: Vladimir Sementsov-Ogievskiy [Fix left shift operand to 1UL; add "finish" parameter. - Fam] Signed-off-by: Fam Zheng Signed-off-by: John Snow Message-id: 1476395910-8697-8-git-send-email-jsnow@redhat.com Signed-off-by: Max Reitz --- include/qemu/hbitmap.h | 79 ++++++++++++++++++++++++ util/hbitmap.c | 137 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 216 insertions(+) diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h index 17259191b6..eb464759d5 100644 --- a/include/qemu/hbitmap.h +++ b/include/qemu/hbitmap.h @@ -145,6 +145,85 @@ void hbitmap_reset_all(HBitmap *hb); */ bool hbitmap_get(const HBitmap *hb, uint64_t item); +/** + * hbitmap_serialization_granularity: + * @hb: HBitmap to operate on. + * + * Granularity of serialization chunks, used by other serialization functions. + * For every chunk: + * 1. Chunk start should be aligned to this granularity. + * 2. Chunk size should be aligned too, except for last chunk (for which + * start + count == hb->size) + */ +uint64_t hbitmap_serialization_granularity(const HBitmap *hb); + +/** + * hbitmap_serialization_size: + * @hb: HBitmap to operate on. + * @start: Starting bit + * @count: Number of bits + * + * Return number of bytes hbitmap_(de)serialize_part needs + */ +uint64_t hbitmap_serialization_size(const HBitmap *hb, + uint64_t start, uint64_t count); + +/** + * hbitmap_serialize_part + * @hb: HBitmap to operate on. + * @buf: Buffer to store serialized bitmap. + * @start: First bit to store. + * @count: Number of bits to store. + * + * Stores HBitmap data corresponding to given region. The format of saved data + * is linear sequence of bits, so it can be used by hbitmap_deserialize_part + * independently of endianness and size of HBitmap level array elements + */ +void hbitmap_serialize_part(const HBitmap *hb, uint8_t *buf, + uint64_t start, uint64_t count); + +/** + * hbitmap_deserialize_part + * @hb: HBitmap to operate on. + * @buf: Buffer to restore bitmap data from. + * @start: First bit to restore. + * @count: Number of bits to restore. + * @finish: Whether to call hbitmap_deserialize_finish automatically. + * + * Restores HBitmap data corresponding to given region. The format is the same + * as for hbitmap_serialize_part. + * + * If @finish is false, caller must call hbitmap_serialize_finish before using + * the bitmap. + */ +void hbitmap_deserialize_part(HBitmap *hb, uint8_t *buf, + uint64_t start, uint64_t count, + bool finish); + +/** + * hbitmap_deserialize_zeroes + * @hb: HBitmap to operate on. + * @start: First bit to restore. + * @count: Number of bits to restore. + * @finish: Whether to call hbitmap_deserialize_finish automatically. + * + * Fills the bitmap with zeroes. + * + * If @finish is false, caller must call hbitmap_serialize_finish before using + * the bitmap. + */ +void hbitmap_deserialize_zeroes(HBitmap *hb, uint64_t start, uint64_t count, + bool finish); + +/** + * hbitmap_deserialize_finish + * @hb: HBitmap to operate on. + * + * Repair HBitmap after calling hbitmap_deserialize_data. Actually, all HBitmap + * layers are restored here. + */ +void hbitmap_deserialize_finish(HBitmap *hb); + /** * hbitmap_free: * @hb: HBitmap to operate on. diff --git a/util/hbitmap.c b/util/hbitmap.c index f303975371..5d1a21ce91 100644 --- a/util/hbitmap.c +++ b/util/hbitmap.c @@ -397,6 +397,143 @@ bool hbitmap_get(const HBitmap *hb, uint64_t item) return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0; } +uint64_t hbitmap_serialization_granularity(const HBitmap *hb) +{ + /* Require at least 64 bit granularity to be safe on both 64 bit and 32 bit + * hosts. */ + return 64 << hb->granularity; +} + +/* Start should be aligned to serialization granularity, chunk size should be + * aligned to serialization granularity too, except for last chunk. + */ +static void serialization_chunk(const HBitmap *hb, + uint64_t start, uint64_t count, + unsigned long **first_el, uint64_t *el_count) +{ + uint64_t last = start + count - 1; + uint64_t gran = hbitmap_serialization_granularity(hb); + + assert((start & (gran - 1)) == 0); + assert((last >> hb->granularity) < hb->size); + if ((last >> hb->granularity) != hb->size - 1) { + assert((count & (gran - 1)) == 0); + } + + start = (start >> hb->granularity) >> BITS_PER_LEVEL; + last = (last >> hb->granularity) >> BITS_PER_LEVEL; + + *first_el = &hb->levels[HBITMAP_LEVELS - 1][start]; + *el_count = last - start + 1; +} + +uint64_t hbitmap_serialization_size(const HBitmap *hb, + uint64_t start, uint64_t count) +{ + uint64_t el_count; + unsigned long *cur; + + if (!count) { + return 0; + } + serialization_chunk(hb, start, count, &cur, &el_count); + + return el_count * sizeof(unsigned long); +} + +void hbitmap_serialize_part(const HBitmap *hb, uint8_t *buf, + uint64_t start, uint64_t count) +{ + uint64_t el_count; + unsigned long *cur, *end; + + if (!count) { + return; + } + serialization_chunk(hb, start, count, &cur, &el_count); + end = cur + el_count; + + while (cur != end) { + unsigned long el = + (BITS_PER_LONG == 32 ? cpu_to_le32(*cur) : cpu_to_le64(*cur)); + + memcpy(buf, &el, sizeof(el)); + buf += sizeof(el); + cur++; + } +} + +void hbitmap_deserialize_part(HBitmap *hb, uint8_t *buf, + uint64_t start, uint64_t count, + bool finish) +{ + uint64_t el_count; + unsigned long *cur, *end; + + if (!count) { + return; + } + serialization_chunk(hb, start, count, &cur, &el_count); + end = cur + el_count; + + while (cur != end) { + memcpy(cur, buf, sizeof(*cur)); + + if (BITS_PER_LONG == 32) { + le32_to_cpus((uint32_t *)cur); + } else { + le64_to_cpus((uint64_t *)cur); + } + + buf += sizeof(unsigned long); + cur++; + } + if (finish) { + hbitmap_deserialize_finish(hb); + } +} + +void hbitmap_deserialize_zeroes(HBitmap *hb, uint64_t start, uint64_t count, + bool finish) +{ + uint64_t el_count; + unsigned long *first; + + if (!count) { + return; + } + serialization_chunk(hb, start, count, &first, &el_count); + + memset(first, 0, el_count * sizeof(unsigned long)); + if (finish) { + hbitmap_deserialize_finish(hb); + } +} + +void hbitmap_deserialize_finish(HBitmap *bitmap) +{ + int64_t i, size, prev_size; + int lev; + + /* restore levels starting from penultimate to zero level, assuming + * that the last level is ok */ + size = MAX((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1); + for (lev = HBITMAP_LEVELS - 1; lev-- > 0; ) { + prev_size = size; + size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1); + memset(bitmap->levels[lev], 0, size * sizeof(unsigned long)); + + for (i = 0; i < prev_size; ++i) { + if (bitmap->levels[lev + 1][i]) { + bitmap->levels[lev][i >> BITS_PER_LEVEL] |= + 1UL << (i & (BITS_PER_LONG - 1)); + } + } + } + + bitmap->levels[0][0] |= 1UL << (BITS_PER_LONG - 1); +} + void hbitmap_free(HBitmap *hb) { unsigned i; From 882c36f5902a28125e1f98ded74e1a0b3e05e342 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 13 Oct 2016 17:58:28 -0400 Subject: [PATCH 10/13] block: BdrvDirtyBitmap serialization interface Several functions to provide necessary access to BdrvDirtyBitmap for block-migration.c Signed-off-by: Vladimir Sementsov-Ogievskiy [Add the "finish" parameters. - Fam] Signed-off-by: Fam Zheng Reviewed-by: John Snow Reviewed-by: Max Reitz Signed-off-by: John Snow Message-id: 1476395910-8697-9-git-send-email-jsnow@redhat.com Signed-off-by: Max Reitz --- block/dirty-bitmap.c | 37 ++++++++++++++++++++++++++++++++++++ include/block/dirty-bitmap.h | 14 ++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 31d5296ef7..384146bbf3 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -453,6 +453,43 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in) hbitmap_free(tmp); } +uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap, + uint64_t start, uint64_t count) +{ + return hbitmap_serialization_size(bitmap->bitmap, start, count); +} + +uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap) +{ + return hbitmap_serialization_granularity(bitmap->bitmap); +} + +void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap, + uint8_t *buf, uint64_t start, + uint64_t count) +{ + hbitmap_serialize_part(bitmap->bitmap, buf, start, count); +} + +void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap *bitmap, + uint8_t *buf, uint64_t start, + uint64_t count, bool finish) +{ + hbitmap_deserialize_part(bitmap->bitmap, buf, start, count, finish); +} + +void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap, + uint64_t start, uint64_t count, + bool finish) +{ + hbitmap_deserialize_zeroes(bitmap->bitmap, start, count, finish); +} + +void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap) +{ + hbitmap_deserialize_finish(bitmap->bitmap); +} + void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int64_t nr_sectors) { diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index c4e7858a62..efc2965d35 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -55,4 +55,18 @@ void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num); int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); +uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap, + uint64_t start, uint64_t count); +uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap); +void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap, + uint8_t *buf, uint64_t start, + uint64_t count); +void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap *bitmap, + uint8_t *buf, uint64_t start, + uint64_t count, bool finish); +void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap, + uint64_t start, uint64_t count, + bool finish); +void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap); + #endif From 2071f26e2d67fa385c2baf061137b7482ace270c Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 13 Oct 2016 17:58:29 -0400 Subject: [PATCH 11/13] tests: Add test code for hbitmap serialization Signed-off-by: Fam Zheng [Fixed minor constant issue. --js] Signed-off-by: John Snow Signed-off-by: John Snow Message-id: 1476395910-8697-10-git-send-email-jsnow@redhat.com Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- tests/test-hbitmap.c | 156 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c index e3abde1aae..9b7495cc32 100644 --- a/tests/test-hbitmap.c +++ b/tests/test-hbitmap.c @@ -11,6 +11,7 @@ #include "qemu/osdep.h" #include "qemu/hbitmap.h" +#include "qemu/bitmap.h" #include "block/block.h" #define LOG_BITS_PER_LONG (BITS_PER_LONG == 32 ? 5 : 6) @@ -737,6 +738,16 @@ static void test_hbitmap_meta_one(TestHBitmapData *data, const void *unused) } } +static void test_hbitmap_serialize_granularity(TestHBitmapData *data, + const void *unused) +{ + int r; + + hbitmap_test_init(data, L3 * 2, 3); + r = hbitmap_serialization_granularity(data->hb); + g_assert_cmpint(r, ==, 64 << 3); +} + static void test_hbitmap_meta_zero(TestHBitmapData *data, const void *unused) { hbitmap_test_init_meta(data, 0, 0, 1); @@ -744,6 +755,142 @@ static void test_hbitmap_meta_zero(TestHBitmapData *data, const void *unused) hbitmap_check_meta(data, 0, 0); } +static void hbitmap_test_serialize_range(TestHBitmapData *data, + uint8_t *buf, size_t buf_size, + uint64_t pos, uint64_t count) +{ + size_t i; + unsigned long *el = (unsigned long *)buf; + + assert(hbitmap_granularity(data->hb) == 0); + hbitmap_reset_all(data->hb); + memset(buf, 0, buf_size); + if (count) { + hbitmap_set(data->hb, pos, count); + } + hbitmap_serialize_part(data->hb, buf, 0, data->size); + + /* Serialized buffer is inherently LE, convert it back manually to test */ + for (i = 0; i < buf_size / sizeof(unsigned long); i++) { + el[i] = (BITS_PER_LONG == 32 ? le32_to_cpu(el[i]) : le64_to_cpu(el[i])); + } + + for (i = 0; i < data->size; i++) { + int is_set = test_bit(i, (unsigned long *)buf); + if (i >= pos && i < pos + count) { + g_assert(is_set); + } else { + g_assert(!is_set); + } + } + + /* Re-serialize for deserialization testing */ + memset(buf, 0, buf_size); + hbitmap_serialize_part(data->hb, buf, 0, data->size); + hbitmap_reset_all(data->hb); + hbitmap_deserialize_part(data->hb, buf, 0, data->size, true); + + for (i = 0; i < data->size; i++) { + int is_set = hbitmap_get(data->hb, i); + if (i >= pos && i < pos + count) { + g_assert(is_set); + } else { + g_assert(!is_set); + } + } +} + +static void test_hbitmap_serialize_basic(TestHBitmapData *data, + const void *unused) +{ + int i, j; + size_t buf_size; + uint8_t *buf; + uint64_t positions[] = { 0, 1, L1 - 1, L1, L2 - 1, L2, L2 + 1, L3 - 1 }; + int num_positions = sizeof(positions) / sizeof(positions[0]); + + hbitmap_test_init(data, L3, 0); + buf_size = hbitmap_serialization_size(data->hb, 0, data->size); + buf = g_malloc0(buf_size); + + for (i = 0; i < num_positions; i++) { + for (j = 0; j < num_positions; j++) { + hbitmap_test_serialize_range(data, buf, buf_size, + positions[i], + MIN(positions[j], L3 - positions[i])); + } + } + + g_free(buf); +} + +static void test_hbitmap_serialize_part(TestHBitmapData *data, + const void *unused) +{ + int i, j, k; + size_t buf_size; + uint8_t *buf; + uint64_t positions[] = { 0, 1, L1 - 1, L1, L2 - 1, L2, L2 + 1, L3 - 1 }; + int num_positions = sizeof(positions) / sizeof(positions[0]); + + hbitmap_test_init(data, L3, 0); + buf_size = L2; + buf = g_malloc0(buf_size); + + for (i = 0; i < num_positions; i++) { + hbitmap_set(data->hb, positions[i], 1); + } + + for (i = 0; i < data->size; i += buf_size) { + unsigned long *el = (unsigned long *)buf; + hbitmap_serialize_part(data->hb, buf, i, buf_size); + for (j = 0; j < buf_size / sizeof(unsigned long); j++) { + el[j] = (BITS_PER_LONG == 32 ? le32_to_cpu(el[j]) : le64_to_cpu(el[j])); + } + + for (j = 0; j < buf_size; j++) { + bool should_set = false; + for (k = 0; k < num_positions; k++) { + if (positions[k] == j + i) { + should_set = true; + break; + } + } + g_assert_cmpint(should_set, ==, test_bit(j, (unsigned long *)buf)); + } + } + + g_free(buf); +} + +static void test_hbitmap_serialize_zeroes(TestHBitmapData *data, + const void *unused) +{ + int i; + HBitmapIter iter; + int64_t next; + uint64_t min_l1 = MAX(L1, 64); + uint64_t positions[] = { 0, min_l1, L2, L3 - min_l1}; + int num_positions = sizeof(positions) / sizeof(positions[0]); + + hbitmap_test_init(data, L3, 0); + + for (i = 0; i < num_positions; i++) { + hbitmap_set(data->hb, positions[i], L1); + } + + for (i = 0; i < num_positions; i++) { + hbitmap_deserialize_zeroes(data->hb, positions[i], min_l1, true); + hbitmap_iter_init(&iter, data->hb, 0); + next = hbitmap_iter_next(&iter); + if (i == num_positions - 1) { + g_assert_cmpint(next, ==, -1); + } else { + g_assert_cmpint(next, ==, positions[i + 1]); + } + } +} + static void hbitmap_test_add(const char *testpath, void (*test_func)(TestHBitmapData *data, const void *user_data)) { @@ -799,6 +946,15 @@ int main(int argc, char **argv) hbitmap_test_add("/hbitmap/meta/byte", test_hbitmap_meta_byte); hbitmap_test_add("/hbitmap/meta/word", test_hbitmap_meta_word); hbitmap_test_add("/hbitmap/meta/sector", test_hbitmap_meta_sector); + + hbitmap_test_add("/hbitmap/serialize/granularity", + test_hbitmap_serialize_granularity); + hbitmap_test_add("/hbitmap/serialize/basic", + test_hbitmap_serialize_basic); + hbitmap_test_add("/hbitmap/serialize/part", + test_hbitmap_serialize_part); + hbitmap_test_add("/hbitmap/serialize/zeroes", + test_hbitmap_serialize_zeroes); g_test_run(); return 0; From 6d3f4049ba532b01ff738f9e5a66df1477c5ffdd Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 13 Oct 2016 17:58:30 -0400 Subject: [PATCH 12/13] block: More operations for meta dirty bitmap Callers can create an iterator of meta bitmap with bdrv_dirty_meta_iter_new(), then use the bdrv_dirty_iter_* operations on it. Meta iterators are also counted by bitmap->active_iterators. Also add a couple of functions to retrieve granularity and count. Signed-off-by: Fam Zheng Reviewed-by: Max Reitz Signed-off-by: John Snow Message-id: 1476395910-8697-11-git-send-email-jsnow@redhat.com Signed-off-by: Max Reitz --- block/dirty-bitmap.c | 19 +++++++++++++++++++ include/block/dirty-bitmap.h | 3 +++ 2 files changed, 22 insertions(+) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 384146bbf3..519737c8d3 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -393,6 +393,11 @@ uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap) return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); } +uint32_t bdrv_dirty_bitmap_meta_granularity(BdrvDirtyBitmap *bitmap) +{ + return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->meta); +} + BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, uint64_t first_sector) { @@ -403,6 +408,15 @@ BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, return iter; } +BdrvDirtyBitmapIter *bdrv_dirty_meta_iter_new(BdrvDirtyBitmap *bitmap) +{ + BdrvDirtyBitmapIter *iter = g_new(BdrvDirtyBitmapIter, 1); + hbitmap_iter_init(&iter->hbi, bitmap->meta, 0); + iter->bitmap = bitmap; + bitmap->active_iterators++; + return iter; +} + void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter) { if (!iter) { @@ -514,3 +528,8 @@ int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) { return hbitmap_count(bitmap->bitmap); } + +int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap) +{ + return hbitmap_count(bitmap->meta); +} diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index efc2965d35..9dea14ba03 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -30,6 +30,7 @@ void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap); BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs); uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs); uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap); +uint32_t bdrv_dirty_bitmap_meta_granularity(BdrvDirtyBitmap *bitmap); bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap); bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap); const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap); @@ -47,12 +48,14 @@ int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs, void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector, int nb_sectors); +BdrvDirtyBitmapIter *bdrv_dirty_meta_iter_new(BdrvDirtyBitmap *bitmap); BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, uint64_t first_sector); void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter); int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter); void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num); int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); +int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap); void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap, From f4f2539bcf2d41f96d7e60c4c7e81ecdf5d81c63 Mon Sep 17 00:00:00 2001 From: Changlong Xie Date: Wed, 12 Oct 2016 12:50:08 +0800 Subject: [PATCH 13/13] block/replication: Clarify 'top-id' parameter usage The replication driver only supports the 'top-id' parameter for the secondary side; it must not be supplied for the primary side. Reviewed-by: Eric Blake Signed-off-by: Changlong Xie Message-id: 1476247808-15646-1-git-send-email-xiecl.fnst@cn.fujitsu.com Signed-off-by: Max Reitz --- block/replication.c | 5 +++++ qapi/block-core.json | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/block/replication.c b/block/replication.c index 3bd1cf1809..8bbfc8f870 100644 --- a/block/replication.c +++ b/block/replication.c @@ -101,6 +101,11 @@ static int replication_open(BlockDriverState *bs, QDict *options, if (!strcmp(mode, "primary")) { s->mode = REPLICATION_MODE_PRIMARY; + top_id = qemu_opt_get(opts, REPLICATION_TOP_ID); + if (top_id) { + error_setg(&local_err, "The primary side does not support option top-id"); + goto fail; + } } else if (!strcmp(mode, "secondary")) { s->mode = REPLICATION_MODE_SECONDARY; top_id = qemu_opt_get(opts, REPLICATION_TOP_ID); diff --git a/qapi/block-core.json b/qapi/block-core.json index c59047bc94..97b120532a 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2197,7 +2197,8 @@ # @mode: the replication mode # # @top-id: #optional In secondary mode, node name or device ID of the root -# node who owns the replication node chain. Ignored in primary mode. +# node who owns the replication node chain. Must not be given in +# primary mode. # # Since: 2.8 ##