diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index b91499410c..c5669eaa51 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -2437,10 +2437,163 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, return cluster << s->cluster_bits; } +/* + * Helper function for rebuild_refcount_structure(). + * + * Scan the range of clusters [first_cluster, end_cluster) for allocated + * clusters and write all corresponding refblocks to disk. The refblock + * and allocation data is taken from the in-memory refcount table + * *refcount_table[] (of size *nb_clusters), which is basically one big + * (unlimited size) refblock for the whole image. + * + * For these refblocks, clusters are allocated using said in-memory + * refcount table. Care is taken that these allocations are reflected + * in the refblocks written to disk. + * + * The refblocks' offsets are written into a reftable, which is + * *on_disk_reftable_ptr[] (of size *on_disk_reftable_entries_ptr). If + * that reftable is of insufficient size, it will be resized to fit. + * This reftable is not written to disk. + * + * (If *on_disk_reftable_ptr is not NULL, the entries within are assumed + * to point to existing valid refblocks that do not need to be allocated + * again.) + * + * Return whether the on-disk reftable array was resized (true/false), + * or -errno on error. + */ +static int rebuild_refcounts_write_refblocks( + BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters, + int64_t first_cluster, int64_t end_cluster, + uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr + ) +{ + BDRVQcow2State *s = bs->opaque; + int64_t cluster; + int64_t refblock_offset, refblock_start, refblock_index; + int64_t first_free_cluster = 0; + uint64_t *on_disk_reftable = *on_disk_reftable_ptr; + uint32_t on_disk_reftable_entries = *on_disk_reftable_entries_ptr; + void *on_disk_refblock; + bool reftable_grown = false; + int ret; + + for (cluster = first_cluster; cluster < end_cluster; cluster++) { + /* Check all clusters to find refblocks that contain non-zero entries */ + if (!s->get_refcount(*refcount_table, cluster)) { + continue; + } + + /* + * This cluster is allocated, so we need to create a refblock + * for it. The data we will write to disk is just the + * respective slice from *refcount_table, so it will contain + * accurate refcounts for all clusters belonging to this + * refblock. After we have written it, we will therefore skip + * all remaining clusters in this refblock. + */ + + refblock_index = cluster >> s->refcount_block_bits; + refblock_start = refblock_index << s->refcount_block_bits; + + if (on_disk_reftable_entries > refblock_index && + on_disk_reftable[refblock_index]) + { + /* + * We can get here after a `goto write_refblocks`: We have a + * reftable from a previous run, and the refblock is already + * allocated. No need to allocate it again. + */ + refblock_offset = on_disk_reftable[refblock_index]; + } else { + int64_t refblock_cluster_index; + + /* Don't allocate a cluster in a refblock already written to disk */ + if (first_free_cluster < refblock_start) { + first_free_cluster = refblock_start; + } + refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table, + nb_clusters, + &first_free_cluster); + if (refblock_offset < 0) { + fprintf(stderr, "ERROR allocating refblock: %s\n", + strerror(-refblock_offset)); + return refblock_offset; + } + + refblock_cluster_index = refblock_offset / s->cluster_size; + if (refblock_cluster_index >= end_cluster) { + /* + * We must write the refblock that holds this refblock's + * refcount + */ + end_cluster = refblock_cluster_index + 1; + } + + if (on_disk_reftable_entries <= refblock_index) { + on_disk_reftable_entries = + ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE, + s->cluster_size) / REFTABLE_ENTRY_SIZE; + on_disk_reftable = + g_try_realloc(on_disk_reftable, + on_disk_reftable_entries * + REFTABLE_ENTRY_SIZE); + if (!on_disk_reftable) { + return -ENOMEM; + } + + memset(on_disk_reftable + *on_disk_reftable_entries_ptr, 0, + (on_disk_reftable_entries - + *on_disk_reftable_entries_ptr) * + REFTABLE_ENTRY_SIZE); + + *on_disk_reftable_ptr = on_disk_reftable; + *on_disk_reftable_entries_ptr = on_disk_reftable_entries; + + reftable_grown = true; + } else { + assert(on_disk_reftable); + } + on_disk_reftable[refblock_index] = refblock_offset; + } + + /* Refblock is allocated, write it to disk */ + + ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, + s->cluster_size, false); + if (ret < 0) { + fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); + return ret; + } + + /* + * The refblock is simply a slice of *refcount_table. + * Note that the size of *refcount_table is always aligned to + * whole clusters, so the write operation will not result in + * out-of-bounds accesses. + */ + on_disk_refblock = (void *)((char *) *refcount_table + + refblock_index * s->cluster_size); + + ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock, + s->cluster_size); + if (ret < 0) { + fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); + return ret; + } + + /* This refblock is done, skip to its end */ + cluster = refblock_start + s->refcount_block_size - 1; + } + + return reftable_grown; +} + /* * Creates a new refcount structure based solely on the in-memory information - * given through *refcount_table. All necessary allocations will be reflected - * in that array. + * given through *refcount_table (this in-memory information is basically just + * the concatenation of all refblocks). All necessary allocations will be + * reflected in that array. * * On success, the old refcount structure is leaked (it will be covered by the * new refcount structure). @@ -2451,12 +2604,14 @@ static int rebuild_refcount_structure(BlockDriverState *bs, int64_t *nb_clusters) { BDRVQcow2State *s = bs->opaque; - int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0; - int64_t refblock_offset, refblock_start, refblock_index; - uint32_t reftable_size = 0; + int64_t reftable_offset = -1; + int64_t reftable_length = 0; + int64_t reftable_clusters; + int64_t refblock_index; + uint32_t on_disk_reftable_entries = 0; uint64_t *on_disk_reftable = NULL; - void *on_disk_refblock; int ret = 0; + int reftable_size_changed = 0; struct { uint64_t reftable_offset; uint32_t reftable_clusters; @@ -2464,109 +2619,59 @@ static int rebuild_refcount_structure(BlockDriverState *bs, qcow2_cache_empty(bs, s->refcount_block_cache); -write_refblocks: - for (; cluster < *nb_clusters; cluster++) { - if (!s->get_refcount(*refcount_table, cluster)) { - continue; - } + /* + * For each refblock containing entries, we try to allocate a + * cluster (in the in-memory refcount table) and write its offset + * into on_disk_reftable[]. We then write the whole refblock to + * disk (as a slice of the in-memory refcount table). + * This is done by rebuild_refcounts_write_refblocks(). + * + * Once we have scanned all clusters, we try to find space for the + * reftable. This will dirty the in-memory refcount table (i.e. + * make it differ from the refblocks we have already written), so we + * need to run rebuild_refcounts_write_refblocks() again for the + * range of clusters where the reftable has been allocated. + * + * This second run might make the reftable grow again, in which case + * we will need to allocate another space for it, which is why we + * repeat all this until the reftable stops growing. + * + * (This loop will terminate, because with every cluster the + * reftable grows, it can accomodate a multitude of more refcounts, + * so that at some point this must be able to cover the reftable + * and all refblocks describing it.) + * + * We then convert the reftable to big-endian and write it to disk. + * + * Note that we never free any reftable allocations. Doing so would + * needlessly complicate the algorithm: The eventual second check + * run we do will clean up all leaks we have caused. + */ - refblock_index = cluster >> s->refcount_block_bits; - refblock_start = refblock_index << s->refcount_block_bits; - - /* Don't allocate a cluster in a refblock already written to disk */ - if (first_free_cluster < refblock_start) { - first_free_cluster = refblock_start; - } - refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table, - nb_clusters, &first_free_cluster); - if (refblock_offset < 0) { - fprintf(stderr, "ERROR allocating refblock: %s\n", - strerror(-refblock_offset)); - res->check_errors++; - ret = refblock_offset; - goto fail; - } - - if (reftable_size <= refblock_index) { - uint32_t old_reftable_size = reftable_size; - uint64_t *new_on_disk_reftable; - - reftable_size = ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE, - s->cluster_size) / REFTABLE_ENTRY_SIZE; - new_on_disk_reftable = g_try_realloc(on_disk_reftable, - reftable_size * - REFTABLE_ENTRY_SIZE); - if (!new_on_disk_reftable) { - res->check_errors++; - ret = -ENOMEM; - goto fail; - } - on_disk_reftable = new_on_disk_reftable; - - memset(on_disk_reftable + old_reftable_size, 0, - (reftable_size - old_reftable_size) * REFTABLE_ENTRY_SIZE); - - /* The offset we have for the reftable is now no longer valid; - * this will leak that range, but we can easily fix that by running - * a leak-fixing check after this rebuild operation */ - reftable_offset = -1; - } else { - assert(on_disk_reftable); - } - on_disk_reftable[refblock_index] = refblock_offset; - - /* If this is apparently the last refblock (for now), try to squeeze the - * reftable in */ - if (refblock_index == (*nb_clusters - 1) >> s->refcount_block_bits && - reftable_offset < 0) - { - uint64_t reftable_clusters = size_to_clusters(s, reftable_size * - REFTABLE_ENTRY_SIZE); - reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, - refcount_table, nb_clusters, - &first_free_cluster); - if (reftable_offset < 0) { - fprintf(stderr, "ERROR allocating reftable: %s\n", - strerror(-reftable_offset)); - res->check_errors++; - ret = reftable_offset; - goto fail; - } - } - - ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, - s->cluster_size, false); - if (ret < 0) { - fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); - goto fail; - } - - /* The size of *refcount_table is always cluster-aligned, therefore the - * write operation will not overflow */ - on_disk_refblock = (void *)((char *) *refcount_table + - refblock_index * s->cluster_size); - - ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock, - s->cluster_size); - if (ret < 0) { - fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); - goto fail; - } - - /* Go to the end of this refblock */ - cluster = refblock_start + s->refcount_block_size - 1; + reftable_size_changed = + rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, + 0, *nb_clusters, + &on_disk_reftable, + &on_disk_reftable_entries); + if (reftable_size_changed < 0) { + res->check_errors++; + ret = reftable_size_changed; + goto fail; } - if (reftable_offset < 0) { - uint64_t post_refblock_start, reftable_clusters; + /* + * There was no reftable before, so rebuild_refcounts_write_refblocks() + * must have increased its size (from 0 to something). + */ + assert(reftable_size_changed); + + do { + int64_t reftable_start_cluster, reftable_end_cluster; + int64_t first_free_cluster = 0; + + reftable_length = on_disk_reftable_entries * REFTABLE_ENTRY_SIZE; + reftable_clusters = size_to_clusters(s, reftable_length); - post_refblock_start = ROUND_UP(*nb_clusters, s->refcount_block_size); - reftable_clusters = - size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE); - /* Not pretty but simple */ - if (first_free_cluster < post_refblock_start) { - first_free_cluster = post_refblock_start; - } reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, refcount_table, nb_clusters, &first_free_cluster); @@ -2578,24 +2683,55 @@ write_refblocks: goto fail; } - goto write_refblocks; - } + /* + * We need to update the affected refblocks, so re-run the + * write_refblocks loop for the reftable's range of clusters. + */ + assert(offset_into_cluster(s, reftable_offset) == 0); + reftable_start_cluster = reftable_offset / s->cluster_size; + reftable_end_cluster = reftable_start_cluster + reftable_clusters; + reftable_size_changed = + rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, + reftable_start_cluster, + reftable_end_cluster, + &on_disk_reftable, + &on_disk_reftable_entries); + if (reftable_size_changed < 0) { + res->check_errors++; + ret = reftable_size_changed; + goto fail; + } - for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { + /* + * If the reftable size has changed, we will need to find a new + * allocation, repeating the loop. + */ + } while (reftable_size_changed); + + /* The above loop must have run at least once */ + assert(reftable_offset >= 0); + + /* + * All allocations are done, all refblocks are written, convert the + * reftable to big-endian and write it to disk. + */ + + for (refblock_index = 0; refblock_index < on_disk_reftable_entries; + refblock_index++) + { cpu_to_be64s(&on_disk_reftable[refblock_index]); } - ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, - reftable_size * REFTABLE_ENTRY_SIZE, + ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length, false); if (ret < 0) { fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); goto fail; } - assert(reftable_size < INT_MAX / REFTABLE_ENTRY_SIZE); + assert(reftable_length < INT_MAX); ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, - reftable_size * REFTABLE_ENTRY_SIZE); + reftable_length); if (ret < 0) { fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); goto fail; @@ -2604,7 +2740,7 @@ write_refblocks: /* Enter new reftable into the image header */ reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset); reftable_offset_and_clusters.reftable_clusters = - cpu_to_be32(size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE)); + cpu_to_be32(reftable_clusters); ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, refcount_table_offset), &reftable_offset_and_clusters, @@ -2614,12 +2750,14 @@ write_refblocks: goto fail; } - for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { + for (refblock_index = 0; refblock_index < on_disk_reftable_entries; + refblock_index++) + { be64_to_cpus(&on_disk_reftable[refblock_index]); } s->refcount_table = on_disk_reftable; s->refcount_table_offset = reftable_offset; - s->refcount_table_size = reftable_size; + s->refcount_table_size = on_disk_reftable_entries; update_max_refcount_table_index(s); return 0;