qcow2: use one single memory block for the L2/refcount cache tables

The qcow2 L2/refcount cache contains one separate table for each cache entry. Doing one allocation per table adds unnecessary overhead and it also requires us to store the address of each table separately. Since the size of the cache is constant during its lifetime, it's better to have an array that contains all the tables using one single allocation. In my tests measuring freshly created caches with sizes 128MB (L2) and 32MB (refcount) this uses around 10MB of RAM less. Signed-off-by: Alberto Garcia <berto@igalia.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2015-05-11 15:54:53 +03:00 · 2015-05-11 15:54:53 +03:00 · 72e80b8901
parent 13c4941cdd
commit 72e80b8901
4 changed files with 38 additions and 38 deletions
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@ -28,7 +28,6 @@
 #include "trace.h"

 typedef struct Qcow2CachedTable {
-    void*   table;
    int64_t offset;
    bool    dirty;
    int     cache_hits;
@ -40,39 +39,35 @@ struct Qcow2Cache {
    struct Qcow2Cache*      depends;
    int                     size;
    bool                    depends_on_flush;
+    void                   *table_array;
 };

+static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
+                    Qcow2Cache *c, int table)
+{
+    BDRVQcowState *s = bs->opaque;
+    return (uint8_t *) c->table_array + (size_t) table * s->cluster_size;
+}
+
 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
 {
    BDRVQcowState *s = bs->opaque;
    Qcow2Cache *c;
-    int i;

    c = g_new0(Qcow2Cache, 1);
    c->size = num_tables;
    c->entries = g_try_new0(Qcow2CachedTable, num_tables);
-    if (!c->entries) {
-        goto fail;
-    }
+    c->table_array = qemu_try_blockalign(bs->file,
+                                         (size_t) num_tables * s->cluster_size);

-    for (i = 0; i < c->size; i++) {
-        c->entries[i].table = qemu_try_blockalign(bs->file, s->cluster_size);
-        if (c->entries[i].table == NULL) {
-            goto fail;
-        }
+    if (!c->entries || !c->table_array) {
+        qemu_vfree(c->table_array);
+        g_free(c->entries);
+        g_free(c);
+        c = NULL;
    }

    return c;
-
-fail:
-    if (c->entries) {
-        for (i = 0; i < c->size; i++) {
-            qemu_vfree(c->entries[i].table);
-        }
-    }
-    g_free(c->entries);
-    g_free(c);
-    return NULL;
 }

 int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
@ -81,9 +76,9 @@ int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)

    for (i = 0; i < c->size; i++) {
        assert(c->entries[i].ref == 0);
-        qemu_vfree(c->entries[i].table);
    }

+    qemu_vfree(c->table_array);
    g_free(c->entries);
    g_free(c);

@ -151,8 +146,8 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
        BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
    }

-    ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table,
-        s->cluster_size);
+    ret = bdrv_pwrite(bs->file, c->entries[i].offset,
+                      qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
    if (ret < 0) {
        return ret;
    }
@ -304,7 +299,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
            BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
        }

-        ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size);
+        ret = bdrv_pread(bs->file, offset, qcow2_cache_get_table_addr(bs, c, i),
+                         s->cluster_size);
        if (ret < 0) {
            return ret;
        }
@ -319,7 +315,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
 found:
    c->entries[i].cache_hits++;
    c->entries[i].ref++;
-    *table = c->entries[i].table;
+    *table = qcow2_cache_get_table_addr(bs, c, i);

    trace_qcow2_cache_get_done(qemu_coroutine_self(),
                               c == s->l2_table_cache, i);
@ -344,7 +340,7 @@ int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
    int i;

    for (i = 0; i < c->size; i++) {
-        if (c->entries[i].table == *table) {
+        if (qcow2_cache_get_table_addr(bs, c, i) == *table) {
            goto found;
        }
    }
@ -358,12 +354,13 @@ found:
    return 0;
 }

-void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
+void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
+     void *table)
 {
    int i;

    for (i = 0; i < c->size; i++) {
-        if (c->entries[i].table == table) {
+        if (qcow2_cache_get_table_addr(bs, c, i) == table) {
            goto found;
        }
    }
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@ -263,7 +263,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
    BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);

    trace_qcow2_l2_allocate_write_l2(bs, l1_index);
-    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
    ret = qcow2_cache_flush(bs, s->l2_table_cache);
    if (ret < 0) {
        goto fail;
@ -692,7 +692,7 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
    /* compressed clusters never have the copied flag */

    BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
-    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
    l2_table[l2_index] = cpu_to_be64(cluster_offset);
    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
    if (ret < 0) {
@ -771,7 +771,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
    if (ret < 0) {
        goto err;
    }
-    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);

    assert(l2_index + m->nb_clusters <= s->l2_size);
    for (i = 0; i < m->nb_clusters; i++) {
@ -1470,7 +1470,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
        }

        /* First remove L2 entries */
-        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+        qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
        if (!full_discard && s->qcow_version >= 3) {
            l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
        } else {
@ -1558,7 +1558,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
        old_offset = be64_to_cpu(l2_table[l2_index + i]);

        /* Update L2 entries */
-        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+        qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
        if (old_offset & QCOW_OFLAG_COMPRESSED) {
            l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
            qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
@ -1760,7 +1760,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,

        if (is_active_l1) {
            if (l2_dirty) {
-                qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+                qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
                qcow2_cache_depends_on_flush(s->l2_table_cache);
            }
            ret = qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@ -424,7 +424,7 @@ static int alloc_refcount_block(BlockDriverState *bs,

    /* Now the new refcount block needs to be written to disk */
    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
-    qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block);
+    qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, *refcount_block);
    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
    if (ret < 0) {
        goto fail_block;
@ -737,7 +737,8 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
        }
        old_table_index = table_index;

-        qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block);
+        qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache,
+                                     refcount_block);

        /* we can update the count and save it */
        block_index = cluster_index & (s->refcount_block_size - 1);
@ -1182,7 +1183,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                            s->refcount_block_cache);
                    }
                    l2_table[j] = cpu_to_be64(offset);
-                    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+                    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache,
+                                                 l2_table);
                }
            }

--- a/block/qcow2.h
+++ b/block/qcow2.h
@ -574,7 +574,8 @@ int qcow2_read_snapshots(BlockDriverState *bs);
 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
 int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);

-void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
+void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
+     void *table);
 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
    Qcow2Cache *dependency);