From 8af037fe4cfeb88bbcded3122cec2c5be0b90907 Mon Sep 17 00:00:00 2001 From: Lukas Tschoke Date: Sat, 8 Apr 2023 00:11:38 +0200 Subject: [PATCH 01/10] block/vhdx: fix dynamic VHDX BAT corruption The corruption occurs when a BAT entry aligned to 4096 bytes is changed. Specifically, the corruption occurs during the creation of the LOG Data Descriptor. The incorrect behavior involves copying 4088 bytes from the original 4096 bytes aligned offset to `tmp[8..4096]` and then copying the new value for the first BAT entry to the beginning `tmp[0..8]`. This results in all existing BAT entries inside the 4K region being incorrectly moved by 8 bytes and the last entry being lost. This bug did not cause noticeable corruption when only sequentially writing once to an empty dynamic VHDX (e.g. using `qemu-img convert -O vhdx -o subformat=dynamic ...`), but it still resulted in invalid values for the (unused) Sector Bitmap BAT entries. Importantly, this corruption would only become noticeable after the corrupted BAT is re-read from the file. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/727 Cc: qemu-stable@nongnu.org Signed-off-by: Lukas Tschoke Message-Id: <6cfb6d6b-adc5-7772-c8a5-6bae9a0ad668@gmail.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/vhdx-log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/vhdx-log.c b/block/vhdx-log.c index c48cf65d62..38148f107a 100644 --- a/block/vhdx-log.c +++ b/block/vhdx-log.c @@ -981,7 +981,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s, sector_write = merged_sector; } else if (i == sectors - 1 && trailing_length) { /* partial sector at the end of the buffer */ - ret = bdrv_pread(bs->file, file_offset, + ret = bdrv_pread(bs->file, file_offset + trailing_length, VHDX_LOG_SECTOR_SIZE - trailing_length, merged_sector + trailing_length, 0); if (ret < 0) { From 2b1f8fcb847f8cc7a214e14cbbdf99b354a4f8e3 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 11 Apr 2023 13:52:31 +0200 Subject: [PATCH 02/10] iotests: Regression test for vhdx log corruption Signed-off-by: Kevin Wolf Message-Id: <20230411115231.90398-1-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- tests/qemu-iotests/tests/regression-vhdx-log | 62 +++++++++++++++++++ .../tests/regression-vhdx-log.out | 14 +++++ 2 files changed, 76 insertions(+) create mode 100755 tests/qemu-iotests/tests/regression-vhdx-log create mode 100644 tests/qemu-iotests/tests/regression-vhdx-log.out diff --git a/tests/qemu-iotests/tests/regression-vhdx-log b/tests/qemu-iotests/tests/regression-vhdx-log new file mode 100755 index 0000000000..ca264e93d6 --- /dev/null +++ b/tests/qemu-iotests/tests/regression-vhdx-log @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# group: rw auto quick +# +# vhdx regression test: Updating the first entry of a BAT sector corrupted the +# following entries. +# +# Copyright (C) 2023 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=kwolf@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +cd .. +. ./common.rc +. ./common.filter + +_supported_fmt generic +_supported_proto generic +_unsupported_imgopts "subformat=streamOptimized" + +size=64M +_make_test_img $size + +echo +echo "creating pattern" +$QEMU_IO -c "write -P 1 32M 4k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -P 2 0 4k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 1 32M 4k" "$TEST_IMG" | _filter_qemu_io + +echo +echo "checking image for errors" +_check_test_img + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/tests/regression-vhdx-log.out b/tests/qemu-iotests/tests/regression-vhdx-log.out new file mode 100644 index 0000000000..350c257354 --- /dev/null +++ b/tests/qemu-iotests/tests/regression-vhdx-log.out @@ -0,0 +1,14 @@ +QA output created by regression-vhdx-log +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 + +creating pattern +wrote 4096/4096 bytes at offset 33554432 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 4096/4096 bytes at offset 0 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 33554432 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +checking image for errors +No errors were found on the image. +*** done From 160a29e2f8b2d100246ab446813409f72d1e0767 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 7 Apr 2023 17:32:56 +0200 Subject: [PATCH 03/10] block: move has_variable_length to BlockLimits At the protocol level, has_variable_length only needs to be true in the very special case of host CD-ROM drives, so that they do not need an explicit monitor command to read the new size when a disc is loaded in the tray. However, at the format level has_variable_length has to be true for all raw blockdevs and for all filters, even though in practice the length depends on the underlying file and thus will not change except in the case of host CD-ROM drives. As a first step towards computing an accurate value of has_variable_length, add the value into the BlockLimits structure and initialize the field from the BlockDriver. Signed-off-by: Paolo Bonzini Message-Id: <20230407153303.391121-2-pbonzini@redhat.com> Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block.c | 2 +- block/io.c | 6 ++++++ include/block/block_int-common.h | 8 ++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index e0c6c648b1..6a805ff0ea 100644 --- a/block.c +++ b/block.c @@ -5849,7 +5849,7 @@ int64_t coroutine_fn bdrv_co_nb_sectors(BlockDriverState *bs) if (!drv) return -ENOMEDIUM; - if (drv->has_variable_length) { + if (bs->bl.has_variable_length) { int ret = bdrv_co_refresh_total_sectors(bs, bs->total_sectors); if (ret < 0) { return ret; diff --git a/block/io.c b/block/io.c index 8974d46941..932aeb5844 100644 --- a/block/io.c +++ b/block/io.c @@ -182,6 +182,8 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp) drv->bdrv_aio_preadv || drv->bdrv_co_preadv_part) ? 1 : 512; + bs->bl.has_variable_length = drv->has_variable_length; + /* Take some limits from the children as a default */ have_limits = false; QLIST_FOREACH(c, &bs->children, next) { @@ -190,6 +192,10 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp) bdrv_merge_limits(&bs->bl, &c->bs->bl); have_limits = true; } + + if (c->role & BDRV_CHILD_FILTERED) { + bs->bl.has_variable_length |= c->bs->bl.has_variable_length; + } } if (!have_limits) { diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index d419017328..a6d271f25d 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -855,6 +855,14 @@ typedef struct BlockLimits { /* maximum number of iovec elements */ int max_iov; + + /* + * true if the length of the underlying file can change, and QEMU + * is expected to adjust automatically. Mostly for CD-ROM drives, + * whose length is zero when the tray is empty (they don't need + * an explicit monitor command to load the disk inside the guest). + */ + bool has_variable_length; } BlockLimits; typedef struct BdrvOpBlocker BdrvOpBlocker; From 6188088f72fbfc419ba6cbc129479ee0b96c7436 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 7 Apr 2023 17:32:57 +0200 Subject: [PATCH 04/10] block: remove has_variable_length from filters Filters automatically get has_variable_length from their underlying BlockDriverState. There is no need to mark them as variable-length in the BlockDriver. Signed-off-by: Paolo Bonzini Message-Id: <20230407153303.391121-3-pbonzini@redhat.com> Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/copy-on-read.c | 1 - block/filter-compress.c | 1 - block/preallocate.c | 1 - block/replication.c | 1 - 4 files changed, 4 deletions(-) diff --git a/block/copy-on-read.c b/block/copy-on-read.c index cc0f848b0f..b4d6b7efc3 100644 --- a/block/copy-on-read.c +++ b/block/copy-on-read.c @@ -259,7 +259,6 @@ static BlockDriver bdrv_copy_on_read = { .bdrv_co_eject = cor_co_eject, .bdrv_co_lock_medium = cor_co_lock_medium, - .has_variable_length = true, .is_filter = true, }; diff --git a/block/filter-compress.c b/block/filter-compress.c index ac285f4b66..320d9576fa 100644 --- a/block/filter-compress.c +++ b/block/filter-compress.c @@ -146,7 +146,6 @@ static BlockDriver bdrv_compress = { .bdrv_co_eject = compress_co_eject, .bdrv_co_lock_medium = compress_co_lock_medium, - .has_variable_length = true, .is_filter = true, }; diff --git a/block/preallocate.c b/block/preallocate.c index 71c3601809..4d82125036 100644 --- a/block/preallocate.c +++ b/block/preallocate.c @@ -558,7 +558,6 @@ BlockDriver bdrv_preallocate_filter = { .bdrv_set_perm = preallocate_set_perm, .bdrv_child_perm = preallocate_child_perm, - .has_variable_length = true, .is_filter = true, }; diff --git a/block/replication.c b/block/replication.c index de01f96184..ea4bf1aa80 100644 --- a/block/replication.c +++ b/block/replication.c @@ -762,7 +762,6 @@ static BlockDriver bdrv_replication = { .is_filter = true, - .has_variable_length = true, .strong_runtime_opts = replication_strong_runtime_opts, }; From 439cc330c58c6973a5c73dab36c334e29607d47b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 7 Apr 2023 17:32:58 +0200 Subject: [PATCH 05/10] block: refresh bs->total_sectors on reopen After reopening a BlockDriverState, it's possible that the size of the underlying file has changed. This for example is covered by test 171. Right now, this is handled by the raw driver's has_variable_length = true setting. Since this will be removed by the next patch, handle it on reopen instead, together with the existing bdrv_refresh_limits. Signed-off-by: Paolo Bonzini Message-Id: <20230407153303.391121-4-pbonzini@redhat.com> Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block.c b/block.c index 6a805ff0ea..be7dc5d3e9 100644 --- a/block.c +++ b/block.c @@ -4918,6 +4918,7 @@ static void bdrv_reopen_commit(BDRVReopenState *reopen_state) qdict_del(bs->options, "backing"); bdrv_refresh_limits(bs, NULL, NULL); + bdrv_refresh_total_sectors(bs, bs->total_sectors); } /* From 8c6f27e7d85a794698eb1cd32c58df28cece50d1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 7 Apr 2023 17:32:59 +0200 Subject: [PATCH 06/10] block: remove has_variable_length from BlockDriver Fill in the field in BlockLimits directly for host devices, and copy it from there for the raw format. Signed-off-by: Paolo Bonzini Message-Id: <20230407153303.391121-5-pbonzini@redhat.com> Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/file-posix.c | 12 ++++++++---- block/file-win32.c | 2 +- block/io.c | 2 -- block/raw-format.c | 3 ++- include/block/block_int-common.h | 2 -- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index 5760cf22d1..c2dee3f056 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -3743,6 +3743,12 @@ static void cdrom_parse_filename(const char *filename, QDict *options, { bdrv_parse_filename_strip_prefix(filename, "host_cdrom:", options); } + +static void cdrom_refresh_limits(BlockDriverState *bs, Error **errp) +{ + bs->bl.has_variable_length = true; + raw_refresh_limits(bs, errp); +} #endif #ifdef __linux__ @@ -3838,14 +3844,13 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_co_preadv = raw_co_preadv, .bdrv_co_pwritev = raw_co_pwritev, .bdrv_co_flush_to_disk = raw_co_flush_to_disk, - .bdrv_refresh_limits = raw_refresh_limits, + .bdrv_refresh_limits = cdrom_refresh_limits, .bdrv_co_io_plug = raw_co_io_plug, .bdrv_co_io_unplug = raw_co_io_unplug, .bdrv_attach_aio_context = raw_aio_attach_aio_context, .bdrv_co_truncate = raw_co_truncate, .bdrv_co_getlength = raw_co_getlength, - .has_variable_length = true, .bdrv_co_get_allocated_file_size = raw_co_get_allocated_file_size, /* removable device support */ @@ -3967,14 +3972,13 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_co_preadv = raw_co_preadv, .bdrv_co_pwritev = raw_co_pwritev, .bdrv_co_flush_to_disk = raw_co_flush_to_disk, - .bdrv_refresh_limits = raw_refresh_limits, + .bdrv_refresh_limits = cdrom_refresh_limits, .bdrv_co_io_plug = raw_co_io_plug, .bdrv_co_io_unplug = raw_co_io_unplug, .bdrv_attach_aio_context = raw_aio_attach_aio_context, .bdrv_co_truncate = raw_co_truncate, .bdrv_co_getlength = raw_co_getlength, - .has_variable_length = true, .bdrv_co_get_allocated_file_size = raw_co_get_allocated_file_size, /* removable device support */ diff --git a/block/file-win32.c b/block/file-win32.c index c7d0b85306..1763b8662e 100644 --- a/block/file-win32.c +++ b/block/file-win32.c @@ -838,6 +838,7 @@ static void hdev_refresh_limits(BlockDriverState *bs, Error **errp) { /* XXX Does Windows support AIO on less than 512-byte alignment? */ bs->bl.request_alignment = 512; + bs->bl.has_variable_length = true; } static int hdev_open(BlockDriverState *bs, QDict *options, int flags, @@ -933,7 +934,6 @@ static BlockDriver bdrv_host_device = { .bdrv_attach_aio_context = raw_attach_aio_context, .bdrv_co_getlength = raw_co_getlength, - .has_variable_length = true, .bdrv_co_get_allocated_file_size = raw_co_get_allocated_file_size, }; diff --git a/block/io.c b/block/io.c index 932aeb5844..2e267a85ab 100644 --- a/block/io.c +++ b/block/io.c @@ -182,8 +182,6 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp) drv->bdrv_aio_preadv || drv->bdrv_co_preadv_part) ? 1 : 512; - bs->bl.has_variable_length = drv->has_variable_length; - /* Take some limits from the children as a default */ have_limits = false; QLIST_FOREACH(c, &bs->children, next) { diff --git a/block/raw-format.c b/block/raw-format.c index 66783ed8e7..06b8030d9d 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -377,6 +377,8 @@ raw_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) static void raw_refresh_limits(BlockDriverState *bs, Error **errp) { + bs->bl.has_variable_length = bs->file->bs->bl.has_variable_length; + if (bs->probed) { /* To make it easier to protect the first sector, any probed * image is restricted to read-modify-write on sub-sector @@ -623,7 +625,6 @@ BlockDriver bdrv_raw = { .bdrv_co_truncate = &raw_co_truncate, .bdrv_co_getlength = &raw_co_getlength, .is_format = true, - .has_variable_length = true, .bdrv_measure = &raw_measure, .bdrv_co_get_info = &raw_co_get_info, .bdrv_refresh_limits = &raw_refresh_limits, diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index a6d271f25d..f01bb8b617 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -158,8 +158,6 @@ struct BlockDriver { */ bool supports_backing; - bool has_variable_length; - /* * Drivers setting this field must be able to work with just a plain * filename with ':' as a prefix, and no other options. From 2c5451ca523fc2b757e1e5b4e0b9fc84dbd58f97 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 7 Apr 2023 17:33:00 +0200 Subject: [PATCH 07/10] migration/block: replace uses of blk_nb_sectors that do not check result Uses of blk_nb_sectors must check whether the result is negative. Otherwise, underflow can happen. Fortunately, alloc_aio_bitmap() and bmds_aio_inflight() both have an alternative way to retrieve the number of sectors in the file. Signed-off-by: Paolo Bonzini Message-Id: <20230407153303.391121-6-pbonzini@redhat.com> Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- migration/block.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/migration/block.c b/migration/block.c index 426a25bb19..b2497bbd32 100644 --- a/migration/block.c +++ b/migration/block.c @@ -195,7 +195,7 @@ static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector) { int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK; - if (sector < blk_nb_sectors(bmds->blk)) { + if (sector < bmds->total_sectors) { return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] & (1UL << (chunk % (sizeof(unsigned long) * 8)))); } else { @@ -229,10 +229,9 @@ static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num, static void alloc_aio_bitmap(BlkMigDevState *bmds) { - BlockBackend *bb = bmds->blk; int64_t bitmap_size; - bitmap_size = blk_nb_sectors(bb) + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1; + bitmap_size = bmds->total_sectors + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1; bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8; bmds->aio_bitmap = g_malloc0(bitmap_size); From e5203a3b5db1fb1328f104a4863284198b551ce0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 7 Apr 2023 17:33:01 +0200 Subject: [PATCH 08/10] block-backend: inline bdrv_co_get_geometry bdrv_co_get_geometry is only used in blk_co_get_geometry. Inline it in there, to reduce the number of wrappers for bs->total_sectors. Signed-off-by: Paolo Bonzini Message-Id: <20230407153303.391121-7-pbonzini@redhat.com> Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block.c | 10 ---------- block/block-backend.c | 8 ++++++-- include/block/block-io.h | 3 --- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/block.c b/block.c index be7dc5d3e9..e8e08ad668 100644 --- a/block.c +++ b/block.c @@ -5879,16 +5879,6 @@ int64_t coroutine_fn bdrv_co_getlength(BlockDriverState *bs) return ret * BDRV_SECTOR_SIZE; } -/* return 0 as number of sectors if no device present or error */ -void coroutine_fn bdrv_co_get_geometry(BlockDriverState *bs, - uint64_t *nb_sectors_ptr) -{ - int64_t nb_sectors = bdrv_co_nb_sectors(bs); - IO_CODE(); - - *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; -} - bool bdrv_is_sg(BlockDriverState *bs) { IO_CODE(); diff --git a/block/block-backend.c b/block/block-backend.c index 2ee39229e4..36e3a67dff 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1615,16 +1615,20 @@ int64_t coroutine_fn blk_co_getlength(BlockBackend *blk) return bdrv_co_getlength(blk_bs(blk)); } +/* return 0 as number of sectors if no device present or error */ void coroutine_fn blk_co_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr) { + BlockDriverState *bs = blk_bs(blk); + IO_CODE(); GRAPH_RDLOCK_GUARD(); - if (!blk_bs(blk)) { + if (!bs) { *nb_sectors_ptr = 0; } else { - bdrv_co_get_geometry(blk_bs(blk), nb_sectors_ptr); + int64_t nb_sectors = bdrv_co_nb_sectors(bs); + *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; } } diff --git a/include/block/block-io.h b/include/block/block-io.h index dbc034b728..9e2248a295 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -90,9 +90,6 @@ int64_t co_wrapper bdrv_get_allocated_file_size(BlockDriverState *bs); BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, BlockDriverState *in_bs, Error **errp); -void coroutine_fn GRAPH_RDLOCK -bdrv_co_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr); - int coroutine_fn GRAPH_RDLOCK bdrv_co_delete_file(BlockDriverState *bs, Error **errp); From 9ed98cae151368cc89c4bb77c9f325f7185e8f09 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 7 Apr 2023 17:33:02 +0200 Subject: [PATCH 09/10] block-backend: ignore inserted state in blk_co_nb_sectors All callers of blk_co_nb_sectors (and blk_nb_sectors) are able to handle a non-inserted CD-ROM as a zero-length file, they do not need to raise an error. Not using blk_co_is_available() aligns the function with blk_co_get_geometry(), which becomes a simple wrapper for blk_co_nb_sectors(). It will also make it possible to skip the creation of a coroutine in the (common) case where bs->bl.has_variable_length is false. Signed-off-by: Paolo Bonzini Message-Id: <20230407153303.391121-8-pbonzini@redhat.com> Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/block-backend.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 36e3a67dff..cf58d4d1b7 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1615,9 +1615,7 @@ int64_t coroutine_fn blk_co_getlength(BlockBackend *blk) return bdrv_co_getlength(blk_bs(blk)); } -/* return 0 as number of sectors if no device present or error */ -void coroutine_fn blk_co_get_geometry(BlockBackend *blk, - uint64_t *nb_sectors_ptr) +int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); @@ -1625,23 +1623,18 @@ void coroutine_fn blk_co_get_geometry(BlockBackend *blk, GRAPH_RDLOCK_GUARD(); if (!bs) { - *nb_sectors_ptr = 0; + return -ENOMEDIUM; } else { - int64_t nb_sectors = bdrv_co_nb_sectors(bs); - *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; + return bdrv_co_nb_sectors(bs); } } -int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk) +/* return 0 as number of sectors if no device present or error */ +void coroutine_fn blk_co_get_geometry(BlockBackend *blk, + uint64_t *nb_sectors_ptr) { - IO_CODE(); - GRAPH_RDLOCK_GUARD(); - - if (!blk_co_is_available(blk)) { - return -ENOMEDIUM; - } - - return bdrv_co_nb_sectors(blk_bs(blk)); + int64_t ret = blk_co_nb_sectors(blk); + *nb_sectors_ptr = ret < 0 ? 0 : ret; } BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset, From 81f730d4d0e8af9c0211c3fedf406df0046341a9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 7 Apr 2023 17:33:03 +0200 Subject: [PATCH 10/10] block, block-backend: write some hot coroutine wrappers by hand The introduction of the graph lock is causing blk_get_geometry, a hot function used in the I/O path, to create a coroutine. However, the only part that really needs to run in coroutine context is the call to bdrv_co_refresh_total_sectors, which in turn only happens in the rare case of host CD-ROM devices. So, write by hand the three wrappers on the path from blk_co_get_geometry to bdrv_co_refresh_total_sectors, so that the coroutine wrapper is only created if bdrv_nb_sectors actually calls bdrv_refresh_total_sectors. Reported-by: Stefan Hajnoczi Signed-off-by: Paolo Bonzini Message-Id: <20230407153303.391121-9-pbonzini@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block.c | 22 ++++++++++++++++++++++ block/block-backend.c | 27 +++++++++++++++++++++++++++ include/block/block-io.h | 2 +- include/sysemu/block-backend-io.h | 5 ++--- 4 files changed, 52 insertions(+), 4 deletions(-) diff --git a/block.c b/block.c index e8e08ad668..d79a52ca74 100644 --- a/block.c +++ b/block.c @@ -5859,6 +5859,28 @@ int64_t coroutine_fn bdrv_co_nb_sectors(BlockDriverState *bs) return bs->total_sectors; } +/* + * This wrapper is written by hand because this function is in the hot I/O path, + * via blk_get_geometry. + */ +int64_t coroutine_mixed_fn bdrv_nb_sectors(BlockDriverState *bs) +{ + BlockDriver *drv = bs->drv; + IO_CODE(); + + if (!drv) + return -ENOMEDIUM; + + if (bs->bl.has_variable_length) { + int ret = bdrv_refresh_total_sectors(bs, bs->total_sectors); + if (ret < 0) { + return ret; + } + } + + return bs->total_sectors; +} + /** * Return length in bytes on success, -errno on error. * The length is always a multiple of BDRV_SECTOR_SIZE. diff --git a/block/block-backend.c b/block/block-backend.c index cf58d4d1b7..55efc735b4 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1629,6 +1629,23 @@ int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk) } } +/* + * This wrapper is written by hand because this function is in the hot I/O path, + * via blk_get_geometry. + */ +int64_t coroutine_mixed_fn blk_nb_sectors(BlockBackend *blk) +{ + BlockDriverState *bs = blk_bs(blk); + + IO_CODE(); + + if (!bs) { + return -ENOMEDIUM; + } else { + return bdrv_nb_sectors(bs); + } +} + /* return 0 as number of sectors if no device present or error */ void coroutine_fn blk_co_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr) @@ -1637,6 +1654,16 @@ void coroutine_fn blk_co_get_geometry(BlockBackend *blk, *nb_sectors_ptr = ret < 0 ? 0 : ret; } +/* + * This wrapper is written by hand because this function is in the hot I/O path. + */ +void coroutine_mixed_fn blk_get_geometry(BlockBackend *blk, + uint64_t *nb_sectors_ptr) +{ + int64_t ret = blk_nb_sectors(blk); + *nb_sectors_ptr = ret < 0 ? 0 : ret; +} + BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset, QEMUIOVector *qiov, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) diff --git a/include/block/block-io.h b/include/block/block-io.h index 9e2248a295..5dab88521d 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -79,7 +79,7 @@ bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); int64_t coroutine_fn GRAPH_RDLOCK bdrv_co_nb_sectors(BlockDriverState *bs); -int64_t co_wrapper_mixed_bdrv_rdlock bdrv_nb_sectors(BlockDriverState *bs); +int64_t coroutine_mixed_fn bdrv_nb_sectors(BlockDriverState *bs); int64_t coroutine_fn GRAPH_RDLOCK bdrv_co_getlength(BlockDriverState *bs); int64_t co_wrapper_mixed_bdrv_rdlock bdrv_getlength(BlockDriverState *bs); diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h index c672b77247..bb25493ba1 100644 --- a/include/sysemu/block-backend-io.h +++ b/include/sysemu/block-backend-io.h @@ -72,11 +72,10 @@ int64_t co_wrapper_mixed blk_getlength(BlockBackend *blk); void coroutine_fn blk_co_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr); -void co_wrapper_mixed blk_get_geometry(BlockBackend *blk, - uint64_t *nb_sectors_ptr); +void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr); int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk); -int64_t co_wrapper_mixed blk_nb_sectors(BlockBackend *blk); +int64_t blk_nb_sectors(BlockBackend *blk); void *blk_try_blockalign(BlockBackend *blk, size_t size); void *blk_blockalign(BlockBackend *blk, size_t size);