qcow2: Don't rely on free_cluster_index in alloc_refcount_block() (CVE-2014-0147)

free_cluster_index is only correct if update_refcount() was called from
an allocation function, and even there it's brittle because it's used to
protect unfinished allocations which still have a refcount of 0 - if it
moves in the wrong place, the unfinished allocation can be corrupted.

So not using it any more seems to be a good idea. Instead, use the
first requested cluster to do the calculations. Return -EAGAIN if
unfinished allocations could become invalid and let the caller restart
its search for some free clusters.

The context of creating a snapsnot is one situation where
update_refcount() is called outside of a cluster allocation. For this
case, the change fixes a buffer overflow if a cluster is referenced in
an L2 table that cannot be represented by an existing refcount block.
(new_table[refcount_table_index] was out of bounds)

[Bump the qemu-iotests 026 refblock_alloc.write leak count from 10 to
11.
--Stefan]

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit b106ad9185)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
This commit is contained in:
Kevin Wolf 2014-03-28 18:06:31 +01:00 committed by Michael Roth
parent aeba41549d
commit ffa3ab0217
6 changed files with 65 additions and 44 deletions

View File

@ -193,10 +193,11 @@ static int alloc_refcount_block(BlockDriverState *bs,
* they can describe them themselves. * they can describe them themselves.
* *
* - We need to consider that at this point we are inside update_refcounts * - We need to consider that at this point we are inside update_refcounts
* and doing the initial refcount increase. This means that some clusters * and potentially doing an initial refcount increase. This means that
* have already been allocated by the caller, but their refcount isn't * some clusters have already been allocated by the caller, but their
* accurate yet. free_cluster_index tells us where this allocation ends * refcount isn't accurate yet. If we allocate clusters for metadata, we
* as long as we don't overwrite it by freeing clusters. * need to return -EAGAIN to signal the caller that it needs to restart
* the search for free clusters.
* *
* - alloc_clusters_noref and qcow2_free_clusters may load a different * - alloc_clusters_noref and qcow2_free_clusters may load a different
* refcount block into the cache * refcount block into the cache
@ -281,7 +282,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
} }
s->refcount_table[refcount_table_index] = new_block; s->refcount_table[refcount_table_index] = new_block;
return 0;
/* The new refcount block may be where the caller intended to put its
* data, so let it restart the search. */
return -EAGAIN;
} }
ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block); ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
@ -304,8 +308,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
/* Calculate the number of refcount blocks needed so far */ /* Calculate the number of refcount blocks needed so far */
uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT); uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT);
uint64_t blocks_used = (s->free_cluster_index + uint64_t blocks_used = DIV_ROUND_UP(cluster_index, refcount_block_clusters);
refcount_block_clusters - 1) / refcount_block_clusters;
/* And now we need at least one block more for the new metadata */ /* And now we need at least one block more for the new metadata */
uint64_t table_size = next_refcount_table_size(s, blocks_used + 1); uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
@ -338,8 +341,6 @@ static int alloc_refcount_block(BlockDriverState *bs,
uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size); uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size);
uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t)); uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t));
assert(meta_offset >= (s->free_cluster_index * s->cluster_size));
/* Fill the new refcount table */ /* Fill the new refcount table */
memcpy(new_table, s->refcount_table, memcpy(new_table, s->refcount_table,
s->refcount_table_size * sizeof(uint64_t)); s->refcount_table_size * sizeof(uint64_t));
@ -402,18 +403,19 @@ static int alloc_refcount_block(BlockDriverState *bs,
s->refcount_table_size = table_size; s->refcount_table_size = table_size;
s->refcount_table_offset = table_offset; s->refcount_table_offset = table_offset;
/* Free old table. Remember, we must not change free_cluster_index */ /* Free old table. */
uint64_t old_free_cluster_index = s->free_cluster_index;
qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t), qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
QCOW2_DISCARD_OTHER); QCOW2_DISCARD_OTHER);
s->free_cluster_index = old_free_cluster_index;
ret = load_refcount_block(bs, new_block, (void**) refcount_block); ret = load_refcount_block(bs, new_block, (void**) refcount_block);
if (ret < 0) { if (ret < 0) {
return ret; return ret;
} }
return 0; /* If we were trying to do the initial refcount update for some cluster
* allocation, we might have used the same clusters to store newly
* allocated metadata. Make the caller search some new space. */
return -EAGAIN;
fail_table: fail_table:
g_free(new_table); g_free(new_table);
@ -659,12 +661,15 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
int ret; int ret;
BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC); BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
offset = alloc_clusters_noref(bs, size); do {
if (offset < 0) { offset = alloc_clusters_noref(bs, size);
return offset; if (offset < 0) {
} return offset;
}
ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
} while (ret == -EAGAIN);
ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
if (ret < 0) { if (ret < 0) {
return ret; return ret;
} }
@ -677,7 +682,6 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
{ {
BDRVQcowState *s = bs->opaque; BDRVQcowState *s = bs->opaque;
uint64_t cluster_index; uint64_t cluster_index;
uint64_t old_free_cluster_index;
uint64_t i; uint64_t i;
int refcount, ret; int refcount, ret;
@ -686,30 +690,28 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
return 0; return 0;
} }
/* Check how many clusters there are free */ do {
cluster_index = offset >> s->cluster_bits; /* Check how many clusters there are free */
for(i = 0; i < nb_clusters; i++) { cluster_index = offset >> s->cluster_bits;
refcount = get_refcount(bs, cluster_index++); for(i = 0; i < nb_clusters; i++) {
refcount = get_refcount(bs, cluster_index++);
if (refcount < 0) { if (refcount < 0) {
return refcount; return refcount;
} else if (refcount != 0) { } else if (refcount != 0) {
break; break;
}
} }
}
/* And then allocate them */ /* And then allocate them */
old_free_cluster_index = s->free_cluster_index; ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
s->free_cluster_index = cluster_index + i; QCOW2_DISCARD_NEVER);
} while (ret == -EAGAIN);
ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
QCOW2_DISCARD_NEVER);
if (ret < 0) { if (ret < 0) {
return ret; return ret;
} }
s->free_cluster_index = old_free_cluster_index;
return i; return i;
} }

View File

@ -1587,7 +1587,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
*/ */
BlockDriverState* bs; BlockDriverState* bs;
QCowHeader *header; QCowHeader *header;
uint8_t* refcount_table; uint64_t* refcount_table;
Error *local_err = NULL; Error *local_err = NULL;
int ret; int ret;
@ -1637,9 +1637,10 @@ static int qcow2_create2(const char *filename, int64_t total_size,
goto out; goto out;
} }
/* Write an empty refcount table */ /* Write a refcount table with one refcount block */
refcount_table = g_malloc0(cluster_size); refcount_table = g_malloc0(2 * cluster_size);
ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size); refcount_table[0] = cpu_to_be64(2 * cluster_size);
ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size);
g_free(refcount_table); g_free(refcount_table);
if (ret < 0) { if (ret < 0) {
@ -1663,7 +1664,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
goto out; goto out;
} }
ret = qcow2_alloc_clusters(bs, 2 * cluster_size); ret = qcow2_alloc_clusters(bs, 3 * cluster_size);
if (ret < 0) { if (ret < 0) {
error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 " error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
"header and refcount table"); "header and refcount table");

View File

@ -475,7 +475,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: refblock_alloc.write_blocks; errno: 28; imm: off; once: off; write Event: refblock_alloc.write_blocks; errno: 28; imm: off; once: off; write
write failed: No space left on device write failed: No space left on device
10 leaked clusters were found on the image. 11 leaked clusters were found on the image.
This means waste of disk space, but no harm to data. This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
@ -499,7 +499,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: refblock_alloc.write_table; errno: 28; imm: off; once: off; write Event: refblock_alloc.write_table; errno: 28; imm: off; once: off; write
write failed: No space left on device write failed: No space left on device
10 leaked clusters were found on the image. 11 leaked clusters were found on the image.
This means waste of disk space, but no harm to data. This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
@ -523,7 +523,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: refblock_alloc.switch_table; errno: 28; imm: off; once: off; write Event: refblock_alloc.switch_table; errno: 28; imm: off; once: off; write
write failed: No space left on device write failed: No space left on device
10 leaked clusters were found on the image. 11 leaked clusters were found on the image.
This means waste of disk space, but no harm to data. This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824

View File

@ -1,6 +1,6 @@
No errors were found on the image. No errors were found on the image.
7292415/33554432 = 21.73% allocated, 0.00% fragmented, 0.00% compressed clusters 7292415/33554432 = 21.73% allocated, 0.00% fragmented, 0.00% compressed clusters
Image end offset: 4296448000 Image end offset: 4296152064
. .
---------------------------------------------------------------------- ----------------------------------------------------------------------
Ran 1 tests Ran 1 tests

View File

@ -56,6 +56,8 @@ offset_header_size=100
offset_ext_magic=$header_size offset_ext_magic=$header_size
offset_ext_size=$((header_size + 4)) offset_ext_size=$((header_size + 4))
offset_l2_table_0=$((0x40000))
echo echo
echo "== Huge header size ==" echo "== Huge header size =="
_make_test_img 64M _make_test_img 64M
@ -143,6 +145,15 @@ poke_file "$TEST_IMG" "$offset_backing_file_offset" "\x00\x00\x00\x00\x00\x00\x1
poke_file "$TEST_IMG" "$offset_backing_file_size" "\xff\xff\xff\xff" poke_file "$TEST_IMG" "$offset_backing_file_size" "\xff\xff\xff\xff"
{ $QEMU_IO -c "read 0 512" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir { $QEMU_IO -c "read 0 512" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
echo
echo "== Invalid L2 entry (huge physical offset) =="
_make_test_img 64M
{ $QEMU_IO -c "write 0 512" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
poke_file "$TEST_IMG" "$offset_l2_table_0" "\xbf\xff\xff\xff\xff\xff\x00\x00"
{ $QEMU_IMG snapshot -c test $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
poke_file "$TEST_IMG" "$offset_l2_table_0" "\x80\x00\x00\xff\xff\xff\x00\x00"
{ $QEMU_IMG snapshot -c test $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
# success, all done # success, all done
echo "*** done" echo "*** done"
rm -f $seq.full rm -f $seq.full

View File

@ -63,4 +63,11 @@ no file open, try 'help open'
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
qemu-io: can't open device TEST_DIR/t.qcow2: Backing file name too long qemu-io: can't open device TEST_DIR/t.qcow2: Backing file name too long
no file open, try 'help open' no file open, try 'help open'
== Invalid L2 entry (huge physical offset) ==
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
wrote 512/512 bytes at offset 0
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
qemu-img: Could not create snapshot 'test': -27 (File too large)
qemu-img: Could not create snapshot 'test': -11 (Resource temporarily unavailable)
*** done *** done