From 17e9aa3f220df64fc288a175dbd62324e92d850c Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 26 Jun 2018 10:05:45 +0200 Subject: [PATCH 01/29] block-qdict: Pacify Coverity after commit f1b34a248e9 Commit f1b34a248e9 replaced less-than-obvious test in qdict_flatten_qdict() by the obvious one. Sadly, it made something else non-obvious: the fact that @new_key passed to qdict_put_obj() can't be null, because that depends on the function's precondition (target == qdict) == !prefix. Tweak the function some more to help Coverity and human readers alike. Fixes: CID 1393620 Signed-off-by: Markus Armbruster Signed-off-by: Kevin Wolf --- qobject/block-qdict.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/qobject/block-qdict.c b/qobject/block-qdict.c index 36129e7379..80c653013f 100644 --- a/qobject/block-qdict.c +++ b/qobject/block-qdict.c @@ -97,7 +97,7 @@ static void qdict_flatten_qdict(QDict *qdict, QDict *target, const char *prefix) const QDictEntry *entry, *next; QDict *dict_val; QList *list_val; - char *new_key; + char *key, *new_key; entry = qdict_first(qdict); @@ -106,10 +106,12 @@ static void qdict_flatten_qdict(QDict *qdict, QDict *target, const char *prefix) value = qdict_entry_value(entry); dict_val = qobject_to(QDict, value); list_val = qobject_to(QList, value); - new_key = NULL; if (prefix) { - new_key = g_strdup_printf("%s.%s", prefix, entry->key); + key = new_key = g_strdup_printf("%s.%s", prefix, entry->key); + } else { + key = entry->key; + new_key = NULL; } /* @@ -125,19 +127,17 @@ static void qdict_flatten_qdict(QDict *qdict, QDict *target, const char *prefix) * well advised not to modify them altogether.) */ if (dict_val && qdict_size(dict_val)) { - qdict_flatten_qdict(dict_val, target, - new_key ? new_key : entry->key); + qdict_flatten_qdict(dict_val, target, key); if (target == qdict) { qdict_del(qdict, entry->key); } } else if (list_val && !qlist_empty(list_val)) { - qdict_flatten_qlist(list_val, target, - new_key ? new_key : entry->key); + qdict_flatten_qlist(list_val, target, key); if (target == qdict) { qdict_del(qdict, entry->key); } } else if (target != qdict) { - qdict_put_obj(target, new_key, qobject_ref(value)); + qdict_put_obj(target, key, qobject_ref(value)); } g_free(new_key); From e6af90f3fb7b87f18f46f6588fbf3e781d6f4ef6 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 26 Jun 2018 10:05:46 +0200 Subject: [PATCH 02/29] block/crypto: Pacify Coverity after commit f853465aacb Coverity can't see that qobject_input_visitor_new_flat_confused() returns non-null when it doesn't set @local_err. Check the return value instead, like all the other callers do. Fixes: CID 1393615 Fixes: CID 1393616 Signed-off-by: Markus Armbruster Signed-off-by: Kevin Wolf --- block/crypto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/crypto.c b/block/crypto.c index 82091c5f70..aaa8fb7530 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -160,7 +160,7 @@ block_crypto_open_opts_init(QCryptoBlockFormat format, ret->format = format; v = qobject_input_visitor_new_flat_confused(opts, &local_err); - if (local_err) { + if (!v) { goto out; } @@ -214,7 +214,7 @@ block_crypto_create_opts_init(QCryptoBlockFormat format, ret->format = format; v = qobject_input_visitor_new_flat_confused(opts, &local_err); - if (local_err) { + if (!v) { goto out; } From 0ab1c41d1cdf50c90ff976d29ac943e727334668 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 26 Jun 2018 10:37:02 +0200 Subject: [PATCH 03/29] qapi/job: The next release will be 3.0 Commit 51f63ec7d tried to change all references to 2.13 into 3.0, but it failed to achieve this because it was not properly rebased on top of the series introducing qapi/job.json. Change the references now. Signed-off-by: Kevin Wolf Reviewed-by: Markus Armbruster Reviewed-by: Eric Blake --- qapi/job.json | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/qapi/job.json b/qapi/job.json index 9d074eb8d2..a121b615fb 100644 --- a/qapi/job.json +++ b/qapi/job.json @@ -104,7 +104,7 @@ # @id: The job identifier # @status: The new job status # -# Since: 2.13 +# Since: 3.0 ## { 'event': 'JOB_STATUS_CHANGE', 'data': { 'id': 'str', @@ -126,7 +126,7 @@ # # @id: The job identifier. # -# Since: 2.13 +# Since: 3.0 ## { 'command': 'job-pause', 'data': { 'id': 'str' } } @@ -140,7 +140,7 @@ # # @id : The job identifier. # -# Since: 2.13 +# Since: 3.0 ## { 'command': 'job-resume', 'data': { 'id': 'str' } } @@ -159,7 +159,7 @@ # # @id: The job identifier. # -# Since: 2.13 +# Since: 3.0 ## { 'command': 'job-cancel', 'data': { 'id': 'str' } } @@ -171,7 +171,7 @@ # # @id: The job identifier. # -# Since: 2.13 +# Since: 3.0 ## { 'command': 'job-complete', 'data': { 'id': 'str' } } @@ -187,7 +187,7 @@ # # @id: The job identifier. # -# Since: 2.13 +# Since: 3.0 ## { 'command': 'job-dismiss', 'data': { 'id': 'str' } } @@ -205,7 +205,7 @@ # @id: The identifier of any job in the transaction, or of a job that is not # part of any transaction. # -# Since: 2.13 +# Since: 3.0 ## { 'command': 'job-finalize', 'data': { 'id': 'str' } } @@ -237,7 +237,7 @@ # the reason for the job failure. It should not be parsed # by applications. # -# Since: 2.13 +# Since: 3.0 ## { 'struct': 'JobInfo', 'data': { 'id': 'str', 'type': 'JobType', 'status': 'JobStatus', @@ -251,6 +251,6 @@ # # Returns: a list with a @JobInfo for each active job # -# Since: 2.13 +# Since: 3.0 ## { 'command': 'query-jobs', 'returns': ['JobInfo'] } From b8efb36b9e99dbea7370139c0866b97a933f78d4 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 25 Jun 2018 18:39:00 +0200 Subject: [PATCH 04/29] usb-storage: Add rerror/werror properties The error handling policy was traditionally set with -drive, but with -blockdev it is no longer possible to set frontend options. scsi-disk (and other block devices) have long supported qdev properties to configure the error handling policy, so let's add these options to usb-storage as well and just forward them to the internal scsi-disk instance. Signed-off-by: Kevin Wolf Reviewed-by: Markus Armbruster --- hw/scsi/scsi-bus.c | 11 ++++++++++- hw/usb/dev-storage.c | 2 ++ include/hw/scsi/scsi.h | 2 ++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index 9646743a7d..5905f6bf29 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -226,6 +226,8 @@ static void scsi_qdev_unrealize(DeviceState *qdev, Error **errp) SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk, int unit, bool removable, int bootindex, bool share_rw, + BlockdevOnError rerror, + BlockdevOnError werror, const char *serial, Error **errp) { const char *driver; @@ -262,6 +264,10 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk, object_unparent(OBJECT(dev)); return NULL; } + + qdev_prop_set_enum(dev, "rerror", rerror); + qdev_prop_set_enum(dev, "werror", werror); + object_property_set_bool(OBJECT(dev), true, "realized", &err); if (err != NULL) { error_propagate(errp, err); @@ -285,7 +291,10 @@ void scsi_bus_legacy_handle_cmdline(SCSIBus *bus) } qemu_opts_loc_restore(dinfo->opts); scsi_bus_legacy_add_drive(bus, blk_by_legacy_dinfo(dinfo), - unit, false, -1, false, NULL, &error_fatal); + unit, false, -1, false, + BLOCKDEV_ON_ERROR_AUTO, + BLOCKDEV_ON_ERROR_AUTO, + NULL, &error_fatal); } loc_pop(&loc); } diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c index c99398b7f6..cd5551d94f 100644 --- a/hw/usb/dev-storage.c +++ b/hw/usb/dev-storage.c @@ -625,6 +625,7 @@ static void usb_msd_storage_realize(USBDevice *dev, Error **errp) &usb_msd_scsi_info_storage, NULL); scsi_dev = scsi_bus_legacy_add_drive(&s->bus, blk, 0, !!s->removable, s->conf.bootindex, s->conf.share_rw, + s->conf.rerror, s->conf.werror, dev->serial, errp); blk_unref(blk); @@ -671,6 +672,7 @@ static const VMStateDescription vmstate_usb_msd = { static Property msd_properties[] = { DEFINE_BLOCK_PROPERTIES(MSDState, conf), + DEFINE_BLOCK_ERROR_PROPERTIES(MSDState, conf), DEFINE_PROP_BIT("removable", MSDState, removable, 0, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h index e35137ea78..1a7290d563 100644 --- a/include/hw/scsi/scsi.h +++ b/include/hw/scsi/scsi.h @@ -154,6 +154,8 @@ static inline SCSIBus *scsi_bus_from_device(SCSIDevice *d) SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk, int unit, bool removable, int bootindex, bool share_rw, + BlockdevOnError rerror, + BlockdevOnError werror, const char *serial, Error **errp); void scsi_bus_legacy_handle_cmdline(SCSIBus *bus); void scsi_legacy_handle_cmdline(void); From 7c8952697e9c44931090251e142c1d3108c22be4 Mon Sep 17 00:00:00 2001 From: Weiping Zhang Date: Tue, 26 Jun 2018 09:44:56 +0800 Subject: [PATCH 05/29] hw/block/nvme: add optional parameter num_queues for nvme device Add an optional paramter num_queues for device, and set it to 64 by default. Signed-off-by: Weiping Zhang Acked-by: Keith Busch Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index d5bf95b79b..156ecf3c41 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -18,7 +18,8 @@ * Usage: add options: * -drive file=,if=none,id= * -device nvme,drive=,serial=,id=, \ - * cmb_size_mb= + * cmb_size_mb=, \ + * num_queues= * * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. @@ -1232,7 +1233,6 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) pcie_endpoint_cap_init(&n->parent_obj, 0x80); n->num_namespaces = 1; - n->num_queues = 64; n->reg_size = pow2ceil(0x1004 + 2 * (n->num_queues + 1) * 4); n->ns_size = bs_size / (uint64_t)n->num_namespaces; @@ -1341,6 +1341,7 @@ static Property nvme_props[] = { DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf), DEFINE_PROP_STRING("serial", NvmeCtrl, serial), DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0), + DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64), DEFINE_PROP_END_OF_LIST(), }; From ae5475e82fd1ebb24f4f77cf28f59ca6548c6136 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 26 Jun 2018 13:22:45 +0200 Subject: [PATCH 06/29] qcow2: Fix qcow2_truncate() error return value If qcow2_alloc_clusters_at() returns an error, we do need to negate it to get back the positive errno code for error_setg_errno(), but we still need to return the negative error code. Fixes: 772d1f973f87269f6a4a4ea4b880680f3779bbdf Signed-off-by: Kevin Wolf Reviewed-by: Stefan Hajnoczi --- block/qcow2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/qcow2.c b/block/qcow2.c index a3a3aa2a97..6b2e1e1058 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3597,7 +3597,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, if (clusters_allocated < 0) { error_setg_errno(errp, -clusters_allocated, "Failed to allocate data clusters"); - return -clusters_allocated; + return clusters_allocated; } assert(clusters_allocated == nb_new_data_clusters); From 061ca8a368165fae300748c17971824a089f521f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 21 Jun 2018 17:54:35 +0200 Subject: [PATCH 07/29] block: Convert .bdrv_truncate callback to coroutine_fn bdrv_truncate() is an operation that can block (even for a quite long time, depending on the PreallocMode) in I/O paths that shouldn't block. Convert it to a coroutine_fn so that we have the infrastructure for drivers to make their .bdrv_co_truncate implementation asynchronous. This change could potentially introduce new race conditions because bdrv_truncate() isn't necessarily executed atomically any more. Whether this is a problem needs to be evaluated for each block driver that supports truncate: * file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The protocol drivers are trivially safe because they don't actually yield yet, so there is no change in behaviour. * copy-on-read, crypto, raw-format: Essentially just filter drivers that pass the request to a child node, no problem. * qcow2: The implementation modifies metadata, so it needs to hold s->lock to be safe with concurrent I/O requests. In order to avoid double locking, this requires pulling the locking out into preallocate_co() and using qcow2_write_caches() instead of bdrv_flush(). * qed: Does a single header update, this is fine without locking. Signed-off-by: Kevin Wolf Reviewed-by: Stefan Hajnoczi --- block.c | 63 ++++++++++++++++++++++++++++----- block/copy-on-read.c | 8 ++--- block/crypto.c | 9 ++--- block/file-posix.c | 12 +++---- block/file-win32.c | 6 ++-- block/gluster.c | 14 ++++---- block/iscsi.c | 8 ++--- block/nfs.c | 7 ++-- block/qcow2.c | 74 +++++++++++++++++++++++---------------- block/qed.c | 8 +++-- block/raw-format.c | 8 ++--- block/rbd.c | 8 +++-- block/sheepdog.c | 12 +++---- block/ssh.c | 6 ++-- include/block/block.h | 4 +++ include/block/block_int.h | 4 +-- 16 files changed, 162 insertions(+), 89 deletions(-) diff --git a/block.c b/block.c index 1b8147c1b3..f761bc85d5 100644 --- a/block.c +++ b/block.c @@ -3788,8 +3788,8 @@ exit: /** * Truncate file to 'offset' bytes (needed only for file protocols) */ -int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, - Error **errp) +int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, + PreallocMode prealloc, Error **errp) { BlockDriverState *bs = child->bs; BlockDriver *drv = bs->drv; @@ -3807,23 +3807,28 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, return -EINVAL; } - if (!drv->bdrv_truncate) { + bdrv_inc_in_flight(bs); + + if (!drv->bdrv_co_truncate) { if (bs->file && drv->is_filter) { - return bdrv_truncate(bs->file, offset, prealloc, errp); + ret = bdrv_co_truncate(bs->file, offset, prealloc, errp); + goto out; } error_setg(errp, "Image format driver does not support resize"); - return -ENOTSUP; + ret = -ENOTSUP; + goto out; } if (bs->read_only) { error_setg(errp, "Image is read-only"); - return -EACCES; + ret = -EACCES; + goto out; } assert(!(bs->open_flags & BDRV_O_INACTIVE)); - ret = drv->bdrv_truncate(bs, offset, prealloc, errp); + ret = drv->bdrv_co_truncate(bs, offset, prealloc, errp); if (ret < 0) { - return ret; + goto out; } ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); if (ret < 0) { @@ -3834,9 +3839,51 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, bdrv_dirty_bitmap_truncate(bs, offset); bdrv_parent_cb_resize(bs); atomic_inc(&bs->write_gen); + +out: + bdrv_dec_in_flight(bs); return ret; } +typedef struct TruncateCo { + BdrvChild *child; + int64_t offset; + PreallocMode prealloc; + Error **errp; + int ret; +} TruncateCo; + +static void coroutine_fn bdrv_truncate_co_entry(void *opaque) +{ + TruncateCo *tco = opaque; + tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->prealloc, + tco->errp); +} + +int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, + Error **errp) +{ + Coroutine *co; + TruncateCo tco = { + .child = child, + .offset = offset, + .prealloc = prealloc, + .errp = errp, + .ret = NOT_DONE, + }; + + if (qemu_in_coroutine()) { + /* Fast-path if already in coroutine context */ + bdrv_truncate_co_entry(&tco); + } else { + co = qemu_coroutine_create(bdrv_truncate_co_entry, &tco); + qemu_coroutine_enter(co); + BDRV_POLL_WHILE(child->bs, tco.ret == NOT_DONE); + } + + return tco.ret; +} + /** * Length of a allocated file in bytes. Sparse files are counted by actual * allocated space. Return < 0 if error or unknown. diff --git a/block/copy-on-read.c b/block/copy-on-read.c index 6a97208888..1dcdaeed69 100644 --- a/block/copy-on-read.c +++ b/block/copy-on-read.c @@ -80,10 +80,10 @@ static int64_t cor_getlength(BlockDriverState *bs) } -static int cor_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn cor_co_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { - return bdrv_truncate(bs->file, offset, prealloc, errp); + return bdrv_co_truncate(bs->file, offset, prealloc, errp); } @@ -147,7 +147,7 @@ BlockDriver bdrv_copy_on_read = { .bdrv_child_perm = cor_child_perm, .bdrv_getlength = cor_getlength, - .bdrv_truncate = cor_truncate, + .bdrv_co_truncate = cor_co_truncate, .bdrv_co_preadv = cor_co_preadv, .bdrv_co_pwritev = cor_co_pwritev, diff --git a/block/crypto.c b/block/crypto.c index aaa8fb7530..210c80d5c7 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -357,8 +357,9 @@ static int block_crypto_co_create_generic(BlockDriverState *bs, return ret; } -static int block_crypto_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn +block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BlockCrypto *crypto = bs->opaque; uint64_t payload_offset = @@ -371,7 +372,7 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset, offset += payload_offset; - return bdrv_truncate(bs->file, offset, prealloc, errp); + return bdrv_co_truncate(bs->file, offset, prealloc, errp); } static void block_crypto_close(BlockDriverState *bs) @@ -700,7 +701,7 @@ BlockDriver bdrv_crypto_luks = { .bdrv_child_perm = bdrv_format_default_perms, .bdrv_co_create = block_crypto_co_create_luks, .bdrv_co_create_opts = block_crypto_co_create_opts_luks, - .bdrv_truncate = block_crypto_truncate, + .bdrv_co_truncate = block_crypto_co_truncate, .create_opts = &block_crypto_create_opts_luks, .bdrv_reopen_prepare = block_crypto_reopen_prepare, diff --git a/block/file-posix.c b/block/file-posix.c index 43b963b13e..53c5833659 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -1878,8 +1878,8 @@ out: return result; } -static int raw_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BDRVRawState *s = bs->opaque; struct stat st; @@ -2625,7 +2625,7 @@ BlockDriver bdrv_file = { .bdrv_io_unplug = raw_aio_unplug, .bdrv_attach_aio_context = raw_aio_attach_aio_context, - .bdrv_truncate = raw_truncate, + .bdrv_co_truncate = raw_co_truncate, .bdrv_getlength = raw_getlength, .bdrv_get_info = raw_get_info, .bdrv_get_allocated_file_size @@ -3105,7 +3105,7 @@ static BlockDriver bdrv_host_device = { .bdrv_io_plug = raw_aio_plug, .bdrv_io_unplug = raw_aio_unplug, - .bdrv_truncate = raw_truncate, + .bdrv_co_truncate = raw_co_truncate, .bdrv_getlength = raw_getlength, .bdrv_get_info = raw_get_info, .bdrv_get_allocated_file_size @@ -3227,7 +3227,7 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_io_plug = raw_aio_plug, .bdrv_io_unplug = raw_aio_unplug, - .bdrv_truncate = raw_truncate, + .bdrv_co_truncate = raw_co_truncate, .bdrv_getlength = raw_getlength, .has_variable_length = true, .bdrv_get_allocated_file_size @@ -3357,7 +3357,7 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_io_plug = raw_aio_plug, .bdrv_io_unplug = raw_aio_unplug, - .bdrv_truncate = raw_truncate, + .bdrv_co_truncate = raw_co_truncate, .bdrv_getlength = raw_getlength, .has_variable_length = true, .bdrv_get_allocated_file_size diff --git a/block/file-win32.c b/block/file-win32.c index 3c67db4336..0411fe80fd 100644 --- a/block/file-win32.c +++ b/block/file-win32.c @@ -467,8 +467,8 @@ static void raw_close(BlockDriverState *bs) } } -static int raw_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BDRVRawState *s = bs->opaque; LONG low, high; @@ -640,7 +640,7 @@ BlockDriver bdrv_file = { .bdrv_aio_pwritev = raw_aio_pwritev, .bdrv_aio_flush = raw_aio_flush, - .bdrv_truncate = raw_truncate, + .bdrv_co_truncate = raw_co_truncate, .bdrv_getlength = raw_getlength, .bdrv_get_allocated_file_size = raw_get_allocated_file_size, diff --git a/block/gluster.c b/block/gluster.c index b5fe7f3e87..a4e1c8ecd8 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -1177,8 +1177,10 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs, return acb.ret; } -static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static coroutine_fn int qemu_gluster_co_truncate(BlockDriverState *bs, + int64_t offset, + PreallocMode prealloc, + Error **errp) { BDRVGlusterState *s = bs->opaque; return qemu_gluster_do_truncate(s->fd, offset, prealloc, errp); @@ -1499,7 +1501,7 @@ static BlockDriver bdrv_gluster = { .bdrv_co_create_opts = qemu_gluster_co_create_opts, .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, - .bdrv_truncate = qemu_gluster_truncate, + .bdrv_co_truncate = qemu_gluster_co_truncate, .bdrv_co_readv = qemu_gluster_co_readv, .bdrv_co_writev = qemu_gluster_co_writev, .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, @@ -1528,7 +1530,7 @@ static BlockDriver bdrv_gluster_tcp = { .bdrv_co_create_opts = qemu_gluster_co_create_opts, .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, - .bdrv_truncate = qemu_gluster_truncate, + .bdrv_co_truncate = qemu_gluster_co_truncate, .bdrv_co_readv = qemu_gluster_co_readv, .bdrv_co_writev = qemu_gluster_co_writev, .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, @@ -1557,7 +1559,7 @@ static BlockDriver bdrv_gluster_unix = { .bdrv_co_create_opts = qemu_gluster_co_create_opts, .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, - .bdrv_truncate = qemu_gluster_truncate, + .bdrv_co_truncate = qemu_gluster_co_truncate, .bdrv_co_readv = qemu_gluster_co_readv, .bdrv_co_writev = qemu_gluster_co_writev, .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, @@ -1592,7 +1594,7 @@ static BlockDriver bdrv_gluster_rdma = { .bdrv_co_create_opts = qemu_gluster_co_create_opts, .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, - .bdrv_truncate = qemu_gluster_truncate, + .bdrv_co_truncate = qemu_gluster_co_truncate, .bdrv_co_readv = qemu_gluster_co_readv, .bdrv_co_writev = qemu_gluster_co_writev, .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, diff --git a/block/iscsi.c b/block/iscsi.c index 9f00fb47a5..bc84b14e20 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -2085,8 +2085,8 @@ static void iscsi_reopen_commit(BDRVReopenState *reopen_state) } } -static int iscsi_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { IscsiLun *iscsilun = bs->opaque; Error *local_err = NULL; @@ -2431,7 +2431,7 @@ static BlockDriver bdrv_iscsi = { .bdrv_getlength = iscsi_getlength, .bdrv_get_info = iscsi_get_info, - .bdrv_truncate = iscsi_truncate, + .bdrv_co_truncate = iscsi_co_truncate, .bdrv_refresh_limits = iscsi_refresh_limits, .bdrv_co_block_status = iscsi_co_block_status, @@ -2468,7 +2468,7 @@ static BlockDriver bdrv_iser = { .bdrv_getlength = iscsi_getlength, .bdrv_get_info = iscsi_get_info, - .bdrv_truncate = iscsi_truncate, + .bdrv_co_truncate = iscsi_co_truncate, .bdrv_refresh_limits = iscsi_refresh_limits, .bdrv_co_block_status = iscsi_co_block_status, diff --git a/block/nfs.c b/block/nfs.c index 743ca0450e..eab1a2c408 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -743,8 +743,9 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs) return (task.ret < 0 ? task.ret : st.st_blocks * 512); } -static int nfs_file_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn +nfs_file_co_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { NFSClient *client = bs->opaque; int ret; @@ -873,7 +874,7 @@ static BlockDriver bdrv_nfs = { .bdrv_has_zero_init = nfs_has_zero_init, .bdrv_get_allocated_file_size = nfs_get_allocated_file_size, - .bdrv_truncate = nfs_file_truncate, + .bdrv_co_truncate = nfs_file_co_truncate, .bdrv_file_open = nfs_file_open, .bdrv_close = nfs_file_close, diff --git a/block/qcow2.c b/block/qcow2.c index 6b2e1e1058..9e0ccbbfaf 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2543,15 +2543,12 @@ static void coroutine_fn preallocate_co(void *opaque) BlockDriverState *bs = params->bs; uint64_t offset = params->offset; uint64_t new_length = params->new_length; - BDRVQcow2State *s = bs->opaque; uint64_t bytes; uint64_t host_offset = 0; unsigned int cur_bytes; int ret; QCowL2Meta *meta; - qemu_co_mutex_lock(&s->lock); - assert(offset <= new_length); bytes = new_length - offset; @@ -2604,7 +2601,6 @@ static void coroutine_fn preallocate_co(void *opaque) ret = 0; done: - qemu_co_mutex_unlock(&s->lock); params->ret = ret; } @@ -3041,7 +3037,11 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) /* And if we're supposed to preallocate metadata, do that now */ if (qcow2_opts->preallocation != PREALLOC_MODE_OFF) { + BDRVQcow2State *s = blk_bs(blk)->opaque; + qemu_co_mutex_lock(&s->lock); ret = preallocate(blk_bs(blk), 0, qcow2_opts->size); + qemu_co_mutex_unlock(&s->lock); + if (ret < 0) { error_setg_errno(errp, -ret, "Could not preallocate metadata"); goto out; @@ -3437,8 +3437,8 @@ fail: return ret; } -static int qcow2_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BDRVQcow2State *s = bs->opaque; uint64_t old_length; @@ -3458,17 +3458,21 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, return -EINVAL; } + qemu_co_mutex_lock(&s->lock); + /* cannot proceed if image has snapshots */ if (s->nb_snapshots) { error_setg(errp, "Can't resize an image which has snapshots"); - return -ENOTSUP; + ret = -ENOTSUP; + goto fail; } /* cannot proceed if image has bitmaps */ if (s->nb_bitmaps) { /* TODO: resize bitmaps in the image */ error_setg(errp, "Can't resize an image which has bitmaps"); - return -ENOTSUP; + ret = -ENOTSUP; + goto fail; } old_length = bs->total_sectors * 512; @@ -3479,7 +3483,8 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, if (prealloc != PREALLOC_MODE_OFF) { error_setg(errp, "Preallocation can't be used for shrinking an image"); - return -EINVAL; + ret = -EINVAL; + goto fail; } ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size), @@ -3488,40 +3493,42 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, QCOW2_DISCARD_ALWAYS, true); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to discard cropped clusters"); - return ret; + goto fail; } ret = qcow2_shrink_l1_table(bs, new_l1_size); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to reduce the number of L2 tables"); - return ret; + goto fail; } ret = qcow2_shrink_reftable(bs); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to discard unused refblocks"); - return ret; + goto fail; } old_file_size = bdrv_getlength(bs->file->bs); if (old_file_size < 0) { error_setg_errno(errp, -old_file_size, "Failed to inquire current file length"); - return old_file_size; + ret = old_file_size; + goto fail; } last_cluster = qcow2_get_last_cluster(bs, old_file_size); if (last_cluster < 0) { error_setg_errno(errp, -last_cluster, "Failed to find the last cluster"); - return last_cluster; + ret = last_cluster; + goto fail; } if ((last_cluster + 1) * s->cluster_size < old_file_size) { Error *local_err = NULL; - bdrv_truncate(bs->file, (last_cluster + 1) * s->cluster_size, - PREALLOC_MODE_OFF, &local_err); + bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size, + PREALLOC_MODE_OFF, &local_err); if (local_err) { warn_reportf_err(local_err, "Failed to truncate the tail of the image: "); @@ -3531,7 +3538,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, ret = qcow2_grow_l1_table(bs, new_l1_size, true); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to grow the L1 table"); - return ret; + goto fail; } } @@ -3543,7 +3550,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, ret = preallocate(bs, old_length, offset); if (ret < 0) { error_setg_errno(errp, -ret, "Preallocation failed"); - return ret; + goto fail; } break; @@ -3559,7 +3566,8 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, if (old_file_size < 0) { error_setg_errno(errp, -old_file_size, "Failed to inquire current file length"); - return old_file_size; + ret = old_file_size; + goto fail; } old_file_size = ROUND_UP(old_file_size, s->cluster_size); @@ -3589,7 +3597,8 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, if (allocation_start < 0) { error_setg_errno(errp, -allocation_start, "Failed to resize refcount structures"); - return allocation_start; + ret = allocation_start; + goto fail; } clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start, @@ -3597,7 +3606,8 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, if (clusters_allocated < 0) { error_setg_errno(errp, -clusters_allocated, "Failed to allocate data clusters"); - return clusters_allocated; + ret = clusters_allocated; + goto fail; } assert(clusters_allocated == nb_new_data_clusters); @@ -3605,13 +3615,13 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, /* Allocate the data area */ new_file_size = allocation_start + nb_new_data_clusters * s->cluster_size; - ret = bdrv_truncate(bs->file, new_file_size, prealloc, errp); + ret = bdrv_co_truncate(bs->file, new_file_size, prealloc, errp); if (ret < 0) { error_prepend(errp, "Failed to resize underlying file: "); qcow2_free_clusters(bs, allocation_start, nb_new_data_clusters * s->cluster_size, QCOW2_DISCARD_OTHER); - return ret; + goto fail; } /* Create the necessary L2 entries */ @@ -3634,7 +3644,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, qcow2_free_clusters(bs, host_offset, nb_new_data_clusters * s->cluster_size, QCOW2_DISCARD_OTHER); - return ret; + goto fail; } guest_offset += nb_clusters * s->cluster_size; @@ -3650,11 +3660,11 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, if (prealloc != PREALLOC_MODE_OFF) { /* Flush metadata before actually changing the image size */ - ret = bdrv_flush(bs); + ret = qcow2_write_caches(bs); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to flush the preallocated area to disk"); - return ret; + goto fail; } } @@ -3664,11 +3674,14 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, &offset, sizeof(uint64_t)); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to update the image size"); - return ret; + goto fail; } s->l1_vm_state_index = new_l1_size; - return 0; + ret = 0; +fail: + qemu_co_mutex_unlock(&s->lock); + return ret; } /* XXX: put compressed sectors first, then all the cluster aligned @@ -3692,7 +3705,8 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, if (cluster_offset < 0) { return cluster_offset; } - return bdrv_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF, NULL); + return bdrv_co_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF, + NULL); } if (offset_into_cluster(s, offset)) { @@ -4696,7 +4710,7 @@ BlockDriver bdrv_qcow2 = { .bdrv_co_pdiscard = qcow2_co_pdiscard, .bdrv_co_copy_range_from = qcow2_co_copy_range_from, .bdrv_co_copy_range_to = qcow2_co_copy_range_to, - .bdrv_truncate = qcow2_truncate, + .bdrv_co_truncate = qcow2_co_truncate, .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed, .bdrv_make_empty = qcow2_make_empty, diff --git a/block/qed.c b/block/qed.c index 2363814538..689ea9d4d5 100644 --- a/block/qed.c +++ b/block/qed.c @@ -1467,8 +1467,10 @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs, QED_AIOCB_WRITE | QED_AIOCB_ZERO); } -static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn bdrv_qed_co_truncate(BlockDriverState *bs, + int64_t offset, + PreallocMode prealloc, + Error **errp) { BDRVQEDState *s = bs->opaque; uint64_t old_image_size; @@ -1678,7 +1680,7 @@ static BlockDriver bdrv_qed = { .bdrv_co_readv = bdrv_qed_co_readv, .bdrv_co_writev = bdrv_qed_co_writev, .bdrv_co_pwrite_zeroes = bdrv_qed_co_pwrite_zeroes, - .bdrv_truncate = bdrv_qed_truncate, + .bdrv_co_truncate = bdrv_qed_co_truncate, .bdrv_getlength = bdrv_qed_getlength, .bdrv_get_info = bdrv_qed_get_info, .bdrv_refresh_limits = bdrv_qed_refresh_limits, diff --git a/block/raw-format.c b/block/raw-format.c index f2e468df6f..b78da564d4 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -366,8 +366,8 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) } } -static int raw_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BDRVRawState *s = bs->opaque; @@ -383,7 +383,7 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset, s->size = offset; offset += s->offset; - return bdrv_truncate(bs->file, offset, prealloc, errp); + return bdrv_co_truncate(bs->file, offset, prealloc, errp); } static void raw_eject(BlockDriverState *bs, bool eject_flag) @@ -545,7 +545,7 @@ BlockDriver bdrv_raw = { .bdrv_co_block_status = &raw_co_block_status, .bdrv_co_copy_range_from = &raw_co_copy_range_from, .bdrv_co_copy_range_to = &raw_co_copy_range_to, - .bdrv_truncate = &raw_truncate, + .bdrv_co_truncate = &raw_co_truncate, .bdrv_getlength = &raw_getlength, .has_variable_length = true, .bdrv_measure = &raw_measure, diff --git a/block/rbd.c b/block/rbd.c index f2c6965418..ca8e5bbace 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -990,8 +990,10 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs) return info.size; } -static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs, + int64_t offset, + PreallocMode prealloc, + Error **errp) { BDRVRBDState *s = bs->opaque; int r; @@ -1184,7 +1186,7 @@ static BlockDriver bdrv_rbd = { .bdrv_get_info = qemu_rbd_getinfo, .create_opts = &qemu_rbd_create_opts, .bdrv_getlength = qemu_rbd_getlength, - .bdrv_truncate = qemu_rbd_truncate, + .bdrv_co_truncate = qemu_rbd_co_truncate, .protocol_name = "rbd", .bdrv_aio_preadv = qemu_rbd_aio_preadv, diff --git a/block/sheepdog.c b/block/sheepdog.c index 665b1763eb..b229a664d9 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -2292,8 +2292,8 @@ static int64_t sd_getlength(BlockDriverState *bs) return s->inode.vdi_size; } -static int sd_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn sd_co_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BDRVSheepdogState *s = bs->opaque; int ret, fd; @@ -2609,7 +2609,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, assert(!flags); if (offset > s->inode.vdi_size) { - ret = sd_truncate(bs, offset, PREALLOC_MODE_OFF, NULL); + ret = sd_co_truncate(bs, offset, PREALLOC_MODE_OFF, NULL); if (ret < 0) { return ret; } @@ -3231,7 +3231,7 @@ static BlockDriver bdrv_sheepdog = { .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_getlength = sd_getlength, .bdrv_get_allocated_file_size = sd_get_allocated_file_size, - .bdrv_truncate = sd_truncate, + .bdrv_co_truncate = sd_co_truncate, .bdrv_co_readv = sd_co_readv, .bdrv_co_writev = sd_co_writev, @@ -3268,7 +3268,7 @@ static BlockDriver bdrv_sheepdog_tcp = { .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_getlength = sd_getlength, .bdrv_get_allocated_file_size = sd_get_allocated_file_size, - .bdrv_truncate = sd_truncate, + .bdrv_co_truncate = sd_co_truncate, .bdrv_co_readv = sd_co_readv, .bdrv_co_writev = sd_co_writev, @@ -3305,7 +3305,7 @@ static BlockDriver bdrv_sheepdog_unix = { .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_getlength = sd_getlength, .bdrv_get_allocated_file_size = sd_get_allocated_file_size, - .bdrv_truncate = sd_truncate, + .bdrv_co_truncate = sd_co_truncate, .bdrv_co_readv = sd_co_readv, .bdrv_co_writev = sd_co_writev, diff --git a/block/ssh.c b/block/ssh.c index da7bbf73e2..7fbc27abdf 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -1243,8 +1243,8 @@ static int64_t ssh_getlength(BlockDriverState *bs) return length; } -static int ssh_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BDRVSSHState *s = bs->opaque; @@ -1279,7 +1279,7 @@ static BlockDriver bdrv_ssh = { .bdrv_co_readv = ssh_co_readv, .bdrv_co_writev = ssh_co_writev, .bdrv_getlength = ssh_getlength, - .bdrv_truncate = ssh_truncate, + .bdrv_co_truncate = ssh_co_truncate, .bdrv_co_flush_to_disk = ssh_co_flush, .create_opts = &ssh_create_opts, }; diff --git a/include/block/block.h b/include/block/block.h index b1d6fdb97a..42e59ff585 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -300,8 +300,12 @@ int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, const char *backing_file); void bdrv_refresh_filename(BlockDriverState *bs); + +int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, + PreallocMode prealloc, Error **errp); int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, Error **errp); + int64_t bdrv_nb_sectors(BlockDriverState *bs); int64_t bdrv_getlength(BlockDriverState *bs); int64_t bdrv_get_allocated_file_size(BlockDriverState *bs); diff --git a/include/block/block_int.h b/include/block/block_int.h index 74646ed722..c653ee663a 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -289,8 +289,8 @@ struct BlockDriver { * bdrv_parse_filename. */ const char *protocol_name; - int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp); + int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp); int64_t (*bdrv_getlength)(BlockDriverState *bs); bool has_variable_length; From 47e86b868d57ffe13162ca44e5f6a51c15c4a769 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 26 Jun 2018 15:52:13 +0200 Subject: [PATCH 08/29] qcow2: Remove coroutine trampoline for preallocate_co() All callers are coroutine_fns now, so we can just directly call preallocate_co(). Signed-off-by: Kevin Wolf Reviewed-by: Stefan Hajnoczi --- block/qcow2.c | 51 ++++++++------------------------------------------- 1 file changed, 8 insertions(+), 43 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 9e0ccbbfaf..4a0d92860d 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2521,15 +2521,6 @@ static int qcow2_set_up_encryption(BlockDriverState *bs, return ret; } - -typedef struct PreallocCo { - BlockDriverState *bs; - uint64_t offset; - uint64_t new_length; - - int ret; -} PreallocCo; - /** * Preallocates metadata structures for data clusters between @offset (in the * guest disk) and @new_length (which is thus generally the new guest disk @@ -2537,12 +2528,9 @@ typedef struct PreallocCo { * * Returns: 0 on success, -errno on failure. */ -static void coroutine_fn preallocate_co(void *opaque) +static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, + uint64_t new_length) { - PreallocCo *params = opaque; - BlockDriverState *bs = params->bs; - uint64_t offset = params->offset; - uint64_t new_length = params->new_length; uint64_t bytes; uint64_t host_offset = 0; unsigned int cur_bytes; @@ -2557,7 +2545,7 @@ static void coroutine_fn preallocate_co(void *opaque) ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes, &host_offset, &meta); if (ret < 0) { - goto done; + return ret; } while (meta) { @@ -2567,7 +2555,7 @@ static void coroutine_fn preallocate_co(void *opaque) if (ret < 0) { qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters, QCOW2_DISCARD_NEVER); - goto done; + return ret; } /* There are no dependent requests, but we need to remove our @@ -2594,34 +2582,11 @@ static void coroutine_fn preallocate_co(void *opaque) ret = bdrv_pwrite(bs->file, (host_offset + cur_bytes) - 1, &data, 1); if (ret < 0) { - goto done; + return ret; } } - ret = 0; - -done: - params->ret = ret; -} - -static int preallocate(BlockDriverState *bs, - uint64_t offset, uint64_t new_length) -{ - PreallocCo params = { - .bs = bs, - .offset = offset, - .new_length = new_length, - .ret = -EINPROGRESS, - }; - - if (qemu_in_coroutine()) { - preallocate_co(¶ms); - } else { - Coroutine *co = qemu_coroutine_create(preallocate_co, ¶ms); - bdrv_coroutine_enter(bs, co); - BDRV_POLL_WHILE(bs, params.ret == -EINPROGRESS); - } - return params.ret; + return 0; } /* qcow2_refcount_metadata_size: @@ -3039,7 +3004,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) if (qcow2_opts->preallocation != PREALLOC_MODE_OFF) { BDRVQcow2State *s = blk_bs(blk)->opaque; qemu_co_mutex_lock(&s->lock); - ret = preallocate(blk_bs(blk), 0, qcow2_opts->size); + ret = preallocate_co(blk_bs(blk), 0, qcow2_opts->size); qemu_co_mutex_unlock(&s->lock); if (ret < 0) { @@ -3547,7 +3512,7 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, break; case PREALLOC_MODE_METADATA: - ret = preallocate(bs, old_length, offset); + ret = preallocate_co(bs, old_length, offset); if (ret < 0) { error_setg_errno(errp, -ret, "Preallocation failed"); goto fail; From 3d9f2d2af63fda5f0404fb85ea80161837a4e4e3 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 26 Jun 2018 13:55:20 +0200 Subject: [PATCH 09/29] block: Move bdrv_truncate() implementation to io.c This moves the bdrv_truncate() implementation from block.c to block/io.c so it can have access to the tracked requests infrastructure. This involves making refresh_total_sectors() public (in block_int.h). Signed-off-by: Kevin Wolf Reviewed-by: Stefan Hajnoczi --- block.c | 111 +------------------------------------- block/io.c | 109 +++++++++++++++++++++++++++++++++++++ include/block/block_int.h | 2 + 3 files changed, 112 insertions(+), 110 deletions(-) diff --git a/block.c b/block.c index f761bc85d5..70a46fdd84 100644 --- a/block.c +++ b/block.c @@ -725,7 +725,7 @@ static int find_image_format(BlockBackend *file, const char *filename, * Set the current 'total_sectors' value * Return 0 on success, -errno on error. */ -static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) +int refresh_total_sectors(BlockDriverState *bs, int64_t hint) { BlockDriver *drv = bs->drv; @@ -2226,16 +2226,6 @@ static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load) } } -static void bdrv_parent_cb_resize(BlockDriverState *bs) -{ - BdrvChild *c; - QLIST_FOREACH(c, &bs->parents, next_parent) { - if (c->role->resize) { - c->role->resize(c); - } - } -} - /* * Sets the backing file link of a BDS. A new reference is created; callers * which don't need their own reference any more must call bdrv_unref(). @@ -3785,105 +3775,6 @@ exit: return ret; } -/** - * Truncate file to 'offset' bytes (needed only for file protocols) - */ -int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, - PreallocMode prealloc, Error **errp) -{ - BlockDriverState *bs = child->bs; - BlockDriver *drv = bs->drv; - int ret; - - assert(child->perm & BLK_PERM_RESIZE); - - /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ - if (!drv) { - error_setg(errp, "No medium inserted"); - return -ENOMEDIUM; - } - if (offset < 0) { - error_setg(errp, "Image size cannot be negative"); - return -EINVAL; - } - - bdrv_inc_in_flight(bs); - - if (!drv->bdrv_co_truncate) { - if (bs->file && drv->is_filter) { - ret = bdrv_co_truncate(bs->file, offset, prealloc, errp); - goto out; - } - error_setg(errp, "Image format driver does not support resize"); - ret = -ENOTSUP; - goto out; - } - if (bs->read_only) { - error_setg(errp, "Image is read-only"); - ret = -EACCES; - goto out; - } - - assert(!(bs->open_flags & BDRV_O_INACTIVE)); - - ret = drv->bdrv_co_truncate(bs, offset, prealloc, errp); - if (ret < 0) { - goto out; - } - ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); - if (ret < 0) { - error_setg_errno(errp, -ret, "Could not refresh total sector count"); - } else { - offset = bs->total_sectors * BDRV_SECTOR_SIZE; - } - bdrv_dirty_bitmap_truncate(bs, offset); - bdrv_parent_cb_resize(bs); - atomic_inc(&bs->write_gen); - -out: - bdrv_dec_in_flight(bs); - return ret; -} - -typedef struct TruncateCo { - BdrvChild *child; - int64_t offset; - PreallocMode prealloc; - Error **errp; - int ret; -} TruncateCo; - -static void coroutine_fn bdrv_truncate_co_entry(void *opaque) -{ - TruncateCo *tco = opaque; - tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->prealloc, - tco->errp); -} - -int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, - Error **errp) -{ - Coroutine *co; - TruncateCo tco = { - .child = child, - .offset = offset, - .prealloc = prealloc, - .errp = errp, - .ret = NOT_DONE, - }; - - if (qemu_in_coroutine()) { - /* Fast-path if already in coroutine context */ - bdrv_truncate_co_entry(&tco); - } else { - co = qemu_coroutine_create(bdrv_truncate_co_entry, &tco); - qemu_coroutine_enter(co); - BDRV_POLL_WHILE(child->bs, tco.ret == NOT_DONE); - } - - return tco.ret; -} - /** * Length of a allocated file in bytes. Sparse files are counted by actual * allocated space. Return < 0 if error or unknown. diff --git a/block/io.c b/block/io.c index ef4fedd364..7e87a42b8e 100644 --- a/block/io.c +++ b/block/io.c @@ -3020,3 +3020,112 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset, bdrv_dec_in_flight(dst_bs); return ret; } + +static void bdrv_parent_cb_resize(BlockDriverState *bs) +{ + BdrvChild *c; + QLIST_FOREACH(c, &bs->parents, next_parent) { + if (c->role->resize) { + c->role->resize(c); + } + } +} + +/** + * Truncate file to 'offset' bytes (needed only for file protocols) + */ +int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, + PreallocMode prealloc, Error **errp) +{ + BlockDriverState *bs = child->bs; + BlockDriver *drv = bs->drv; + int ret; + + assert(child->perm & BLK_PERM_RESIZE); + + /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ + if (!drv) { + error_setg(errp, "No medium inserted"); + return -ENOMEDIUM; + } + if (offset < 0) { + error_setg(errp, "Image size cannot be negative"); + return -EINVAL; + } + + bdrv_inc_in_flight(bs); + + if (!drv->bdrv_co_truncate) { + if (bs->file && drv->is_filter) { + ret = bdrv_co_truncate(bs->file, offset, prealloc, errp); + goto out; + } + error_setg(errp, "Image format driver does not support resize"); + ret = -ENOTSUP; + goto out; + } + if (bs->read_only) { + error_setg(errp, "Image is read-only"); + ret = -EACCES; + goto out; + } + + assert(!(bs->open_flags & BDRV_O_INACTIVE)); + + ret = drv->bdrv_co_truncate(bs, offset, prealloc, errp); + if (ret < 0) { + goto out; + } + ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not refresh total sector count"); + } else { + offset = bs->total_sectors * BDRV_SECTOR_SIZE; + } + bdrv_dirty_bitmap_truncate(bs, offset); + bdrv_parent_cb_resize(bs); + atomic_inc(&bs->write_gen); + +out: + bdrv_dec_in_flight(bs); + return ret; +} + +typedef struct TruncateCo { + BdrvChild *child; + int64_t offset; + PreallocMode prealloc; + Error **errp; + int ret; +} TruncateCo; + +static void coroutine_fn bdrv_truncate_co_entry(void *opaque) +{ + TruncateCo *tco = opaque; + tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->prealloc, + tco->errp); +} + +int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, + Error **errp) +{ + Coroutine *co; + TruncateCo tco = { + .child = child, + .offset = offset, + .prealloc = prealloc, + .errp = errp, + .ret = NOT_DONE, + }; + + if (qemu_in_coroutine()) { + /* Fast-path if already in coroutine context */ + bdrv_truncate_co_entry(&tco); + } else { + co = qemu_coroutine_create(bdrv_truncate_co_entry, &tco); + qemu_coroutine_enter(co); + BDRV_POLL_WHILE(child->bs, tco.ret == NOT_DONE); + } + + return tco.ret; +} diff --git a/include/block/block_int.h b/include/block/block_int.h index c653ee663a..740166a996 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -1157,4 +1157,6 @@ int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset, BdrvChild *dst, uint64_t dst_offset, uint64_t bytes, BdrvRequestFlags flags); +int refresh_total_sectors(BlockDriverState *bs, int64_t hint); + #endif /* BLOCK_INT_H */ From 1bc5f09f2e1b2be8f6f737b8d5352b438fc41492 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 26 Jun 2018 14:23:23 +0200 Subject: [PATCH 10/29] block: Use tracked request for truncate When growing an image, block drivers (especially protocol drivers) may initialise the newly added area. I/O requests to the same area need to wait for this initialisation to be completed so that data writes don't get overwritten and reads don't read uninitialised data. To avoid overhead in the fast I/O path by adding new locking in the protocol drivers and to restrict the impact to requests that actually touch the new area, reuse the existing tracked request infrastructure in block/io.c and mark all discard requests as serialising. With this change, it is safe for protocol drivers to make .bdrv_co_truncate actually asynchronous. Signed-off-by: Kevin Wolf Reviewed-by: Stefan Hajnoczi --- block/io.c | 25 +++++++++++++++++++++++++ include/block/block_int.h | 1 + 2 files changed, 26 insertions(+) diff --git a/block/io.c b/block/io.c index 7e87a42b8e..01a3c4eac5 100644 --- a/block/io.c +++ b/block/io.c @@ -3039,6 +3039,8 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, { BlockDriverState *bs = child->bs; BlockDriver *drv = bs->drv; + BdrvTrackedRequest req; + int64_t old_size, new_bytes; int ret; assert(child->perm & BLK_PERM_RESIZE); @@ -3053,7 +3055,28 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, return -EINVAL; } + old_size = bdrv_getlength(bs); + if (old_size < 0) { + error_setg_errno(errp, -old_size, "Failed to get old image size"); + return old_size; + } + + if (offset > old_size) { + new_bytes = offset - old_size; + } else { + new_bytes = 0; + } + bdrv_inc_in_flight(bs); + tracked_request_begin(&req, bs, offset, new_bytes, BDRV_TRACKED_TRUNCATE); + + /* If we are growing the image and potentially using preallocation for the + * new area, we need to make sure that no write requests are made to it + * concurrently or they might be overwritten by preallocation. */ + if (new_bytes) { + mark_request_serialising(&req, 1); + wait_serialising_requests(&req); + } if (!drv->bdrv_co_truncate) { if (bs->file && drv->is_filter) { @@ -3087,7 +3110,9 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, atomic_inc(&bs->write_gen); out: + tracked_request_end(&req); bdrv_dec_in_flight(bs); + return ret; } diff --git a/include/block/block_int.h b/include/block/block_int.h index 740166a996..af71b414be 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -63,6 +63,7 @@ enum BdrvTrackedRequestType { BDRV_TRACKED_READ, BDRV_TRACKED_WRITE, BDRV_TRACKED_DISCARD, + BDRV_TRACKED_TRUNCATE, }; typedef struct BdrvTrackedRequest { From 93f4e2ff4b31205d8bab0856631a52ed442b8b1c Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 21 Jun 2018 18:23:16 +0200 Subject: [PATCH 11/29] file-posix: Make .bdrv_co_truncate asynchronous This moves the code to resize an image file to the thread pool to avoid blocking. Creating large images with preallocation with blockdev-create is now actually a background job instead of blocking the monitor (and most other things) until the preallocation has completed. Signed-off-by: Kevin Wolf Reviewed-by: Stefan Hajnoczi --- block/file-posix.c | 266 +++++++++++++++++++++++----------------- include/block/raw-aio.h | 4 +- 2 files changed, 154 insertions(+), 116 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index 53c5833659..639265d53b 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -188,8 +188,16 @@ typedef struct RawPosixAIOData { #define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */ off_t aio_offset; int aio_type; - int aio_fd2; - off_t aio_offset2; + union { + struct { + int aio_fd2; + off_t aio_offset2; + }; + struct { + PreallocMode prealloc; + Error **errp; + }; + }; } RawPosixAIOData; #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) @@ -1539,6 +1547,122 @@ static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb) return ret; } +static int handle_aiocb_truncate(RawPosixAIOData *aiocb) +{ + int result = 0; + int64_t current_length = 0; + char *buf = NULL; + struct stat st; + int fd = aiocb->aio_fildes; + int64_t offset = aiocb->aio_offset; + Error **errp = aiocb->errp; + + if (fstat(fd, &st) < 0) { + result = -errno; + error_setg_errno(errp, -result, "Could not stat file"); + return result; + } + + current_length = st.st_size; + if (current_length > offset && aiocb->prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Cannot use preallocation for shrinking files"); + return -ENOTSUP; + } + + switch (aiocb->prealloc) { +#ifdef CONFIG_POSIX_FALLOCATE + case PREALLOC_MODE_FALLOC: + /* + * Truncating before posix_fallocate() makes it about twice slower on + * file systems that do not support fallocate(), trying to check if a + * block is allocated before allocating it, so don't do that here. + */ + if (offset != current_length) { + result = -posix_fallocate(fd, current_length, + offset - current_length); + if (result != 0) { + /* posix_fallocate() doesn't set errno. */ + error_setg_errno(errp, -result, + "Could not preallocate new data"); + } + } else { + result = 0; + } + goto out; +#endif + case PREALLOC_MODE_FULL: + { + int64_t num = 0, left = offset - current_length; + off_t seek_result; + + /* + * Knowing the final size from the beginning could allow the file + * system driver to do less allocations and possibly avoid + * fragmentation of the file. + */ + if (ftruncate(fd, offset) != 0) { + result = -errno; + error_setg_errno(errp, -result, "Could not resize file"); + goto out; + } + + buf = g_malloc0(65536); + + seek_result = lseek(fd, current_length, SEEK_SET); + if (seek_result < 0) { + result = -errno; + error_setg_errno(errp, -result, + "Failed to seek to the old end of file"); + goto out; + } + + while (left > 0) { + num = MIN(left, 65536); + result = write(fd, buf, num); + if (result < 0) { + result = -errno; + error_setg_errno(errp, -result, + "Could not write zeros for preallocation"); + goto out; + } + left -= result; + } + if (result >= 0) { + result = fsync(fd); + if (result < 0) { + result = -errno; + error_setg_errno(errp, -result, + "Could not flush file to disk"); + goto out; + } + } + goto out; + } + case PREALLOC_MODE_OFF: + if (ftruncate(fd, offset) != 0) { + result = -errno; + error_setg_errno(errp, -result, "Could not resize file"); + } + return result; + default: + result = -ENOTSUP; + error_setg(errp, "Unsupported preallocation mode: %s", + PreallocMode_str(aiocb->prealloc)); + return result; + } + +out: + if (result < 0) { + if (ftruncate(fd, current_length) < 0) { + error_report("Failed to restore old file length: %s", + strerror(errno)); + } + } + + g_free(buf); + return result; +} + static int aio_worker(void *arg) { RawPosixAIOData *aiocb = arg; @@ -1582,6 +1706,9 @@ static int aio_worker(void *arg) case QEMU_AIO_COPY_RANGE: ret = handle_aiocb_copy_range(aiocb); break; + case QEMU_AIO_TRUNCATE: + ret = handle_aiocb_truncate(aiocb); + break; default: fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type); ret = -EINVAL; @@ -1765,117 +1892,25 @@ static void raw_close(BlockDriverState *bs) * * Returns: 0 on success, -errno on failure. */ -static int raw_regular_truncate(int fd, int64_t offset, PreallocMode prealloc, - Error **errp) +static int coroutine_fn +raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset, + PreallocMode prealloc, Error **errp) { - int result = 0; - int64_t current_length = 0; - char *buf = NULL; - struct stat st; + RawPosixAIOData *acb = g_new(RawPosixAIOData, 1); + ThreadPool *pool; - if (fstat(fd, &st) < 0) { - result = -errno; - error_setg_errno(errp, -result, "Could not stat file"); - return result; - } + *acb = (RawPosixAIOData) { + .bs = bs, + .aio_fildes = fd, + .aio_type = QEMU_AIO_TRUNCATE, + .aio_offset = offset, + .prealloc = prealloc, + .errp = errp, + }; - current_length = st.st_size; - if (current_length > offset && prealloc != PREALLOC_MODE_OFF) { - error_setg(errp, "Cannot use preallocation for shrinking files"); - return -ENOTSUP; - } - - switch (prealloc) { -#ifdef CONFIG_POSIX_FALLOCATE - case PREALLOC_MODE_FALLOC: - /* - * Truncating before posix_fallocate() makes it about twice slower on - * file systems that do not support fallocate(), trying to check if a - * block is allocated before allocating it, so don't do that here. - */ - if (offset != current_length) { - result = -posix_fallocate(fd, current_length, offset - current_length); - if (result != 0) { - /* posix_fallocate() doesn't set errno. */ - error_setg_errno(errp, -result, - "Could not preallocate new data"); - } - } else { - result = 0; - } - goto out; -#endif - case PREALLOC_MODE_FULL: - { - int64_t num = 0, left = offset - current_length; - off_t seek_result; - - /* - * Knowing the final size from the beginning could allow the file - * system driver to do less allocations and possibly avoid - * fragmentation of the file. - */ - if (ftruncate(fd, offset) != 0) { - result = -errno; - error_setg_errno(errp, -result, "Could not resize file"); - goto out; - } - - buf = g_malloc0(65536); - - seek_result = lseek(fd, current_length, SEEK_SET); - if (seek_result < 0) { - result = -errno; - error_setg_errno(errp, -result, - "Failed to seek to the old end of file"); - goto out; - } - - while (left > 0) { - num = MIN(left, 65536); - result = write(fd, buf, num); - if (result < 0) { - result = -errno; - error_setg_errno(errp, -result, - "Could not write zeros for preallocation"); - goto out; - } - left -= result; - } - if (result >= 0) { - result = fsync(fd); - if (result < 0) { - result = -errno; - error_setg_errno(errp, -result, - "Could not flush file to disk"); - goto out; - } - } - goto out; - } - case PREALLOC_MODE_OFF: - if (ftruncate(fd, offset) != 0) { - result = -errno; - error_setg_errno(errp, -result, "Could not resize file"); - } - return result; - default: - result = -ENOTSUP; - error_setg(errp, "Unsupported preallocation mode: %s", - PreallocMode_str(prealloc)); - return result; - } - -out: - if (result < 0) { - if (ftruncate(fd, current_length) < 0) { - error_report("Failed to restore old file length: %s", - strerror(errno)); - } - } - - g_free(buf); - return result; + /* @bs can be NULL, bdrv_get_aio_context() returns the main context then */ + pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); + return thread_pool_submit_co(pool, aio_worker, acb); } static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, @@ -1892,7 +1927,7 @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, } if (S_ISREG(st.st_mode)) { - return raw_regular_truncate(s->fd, offset, prealloc, errp); + return raw_regular_truncate(bs, s->fd, offset, prealloc, errp); } if (prealloc != PREALLOC_MODE_OFF) { @@ -2094,7 +2129,8 @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs) return (int64_t)st.st_blocks * 512; } -static int raw_co_create(BlockdevCreateOptions *options, Error **errp) +static int coroutine_fn +raw_co_create(BlockdevCreateOptions *options, Error **errp) { BlockdevCreateOptionsFile *file_opts; int fd; @@ -2146,7 +2182,7 @@ static int raw_co_create(BlockdevCreateOptions *options, Error **errp) } /* Clear the file by truncating it to 0 */ - result = raw_regular_truncate(fd, 0, PREALLOC_MODE_OFF, errp); + result = raw_regular_truncate(NULL, fd, 0, PREALLOC_MODE_OFF, errp); if (result < 0) { goto out_close; } @@ -2168,8 +2204,8 @@ static int raw_co_create(BlockdevCreateOptions *options, Error **errp) /* Resize and potentially preallocate the file to the desired * final size */ - result = raw_regular_truncate(fd, file_opts->size, file_opts->preallocation, - errp); + result = raw_regular_truncate(NULL, fd, file_opts->size, + file_opts->preallocation, errp); if (result < 0) { goto out_close; } diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h index 8d698ccd31..6799614e56 100644 --- a/include/block/raw-aio.h +++ b/include/block/raw-aio.h @@ -26,6 +26,7 @@ #define QEMU_AIO_DISCARD 0x0010 #define QEMU_AIO_WRITE_ZEROES 0x0020 #define QEMU_AIO_COPY_RANGE 0x0040 +#define QEMU_AIO_TRUNCATE 0x0080 #define QEMU_AIO_TYPE_MASK \ (QEMU_AIO_READ | \ QEMU_AIO_WRITE | \ @@ -33,7 +34,8 @@ QEMU_AIO_FLUSH | \ QEMU_AIO_DISCARD | \ QEMU_AIO_WRITE_ZEROES | \ - QEMU_AIO_COPY_RANGE) + QEMU_AIO_COPY_RANGE | \ + QEMU_AIO_TRUNCATE) /* AIO flags */ #define QEMU_AIO_MISALIGNED 0x1000 From 354d930dc6e90e97599459b79c071ff1b93e433b Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 27 Jun 2018 11:57:51 +0800 Subject: [PATCH 12/29] qcow2: Remove dead check on !ret MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the beginning of the function, we initialize the local variable to 0, and in the body of the function, we check the assigned values and exit the loop immediately. So here it can never be non-zero. Reported-by: Kevin Wolf Signed-off-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Kevin Wolf --- block/qcow2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/qcow2.c b/block/qcow2.c index 4a0d92860d..2d190aa00b 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1772,7 +1772,7 @@ static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs, while (l2meta != NULL) { QCowL2Meta *next; - if (!ret && link_l2) { + if (link_l2) { ret = qcow2_alloc_cluster_link_l2(bs, l2meta); if (ret) { goto out; From 37aec7d75eb0d035a0db4f2cf9ad8b1b0c10f91b Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 27 Jun 2018 11:57:52 +0800 Subject: [PATCH 13/29] block: Move request tracking to children in copy offloading in_flight and tracked requests need to be tracked in every layer during recursion. For now the only user is qemu-img convert where overlapping requests and IOThreads don't exist, therefore this change doesn't make much difference form user point of view, but it is incorrect as part of the API. Fix it. Reported-by: Kevin Wolf Signed-off-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/io.c | 59 ++++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 31 deletions(-) diff --git a/block/io.c b/block/io.c index 01a3c4eac5..b63822280a 100644 --- a/block/io.c +++ b/block/io.c @@ -2932,6 +2932,9 @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src, BdrvRequestFlags flags, bool recurse_src) { + BdrvTrackedRequest src_req, dst_req; + BlockDriverState *src_bs = src->bs; + BlockDriverState *dst_bs = dst->bs; int ret; if (!src || !dst || !src->bs || !dst->bs) { @@ -2955,17 +2958,31 @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src, || src->bs->encrypted || dst->bs->encrypted) { return -ENOTSUP; } + bdrv_inc_in_flight(src_bs); + bdrv_inc_in_flight(dst_bs); + tracked_request_begin(&src_req, src_bs, src_offset, + bytes, BDRV_TRACKED_READ); + tracked_request_begin(&dst_req, dst_bs, dst_offset, + bytes, BDRV_TRACKED_WRITE); + + wait_serialising_requests(&src_req); + wait_serialising_requests(&dst_req); if (recurse_src) { - return src->bs->drv->bdrv_co_copy_range_from(src->bs, - src, src_offset, - dst, dst_offset, - bytes, flags); + ret = src->bs->drv->bdrv_co_copy_range_from(src->bs, + src, src_offset, + dst, dst_offset, + bytes, flags); } else { - return dst->bs->drv->bdrv_co_copy_range_to(dst->bs, - src, src_offset, - dst, dst_offset, - bytes, flags); + ret = dst->bs->drv->bdrv_co_copy_range_to(dst->bs, + src, src_offset, + dst, dst_offset, + bytes, flags); } + tracked_request_end(&src_req); + tracked_request_end(&dst_req); + bdrv_dec_in_flight(src_bs); + bdrv_dec_in_flight(dst_bs); + return ret; } /* Copy range from @src to @dst. @@ -2996,29 +3013,9 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset, BdrvChild *dst, uint64_t dst_offset, uint64_t bytes, BdrvRequestFlags flags) { - BdrvTrackedRequest src_req, dst_req; - BlockDriverState *src_bs = src->bs; - BlockDriverState *dst_bs = dst->bs; - int ret; - - bdrv_inc_in_flight(src_bs); - bdrv_inc_in_flight(dst_bs); - tracked_request_begin(&src_req, src_bs, src_offset, - bytes, BDRV_TRACKED_READ); - tracked_request_begin(&dst_req, dst_bs, dst_offset, - bytes, BDRV_TRACKED_WRITE); - - wait_serialising_requests(&src_req); - wait_serialising_requests(&dst_req); - ret = bdrv_co_copy_range_from(src, src_offset, - dst, dst_offset, - bytes, flags); - - tracked_request_end(&src_req); - tracked_request_end(&dst_req); - bdrv_dec_in_flight(src_bs); - bdrv_dec_in_flight(dst_bs); - return ret; + return bdrv_co_copy_range_from(src, src_offset, + dst, dst_offset, + bytes, flags); } static void bdrv_parent_cb_resize(BlockDriverState *bs) From 796d32394516d624986e9275f56bfcbffe945491 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 26 Jun 2018 19:41:19 +0200 Subject: [PATCH 14/29] block/crypto: Simplify block_crypto_{open,create}_opts_init() block_crypto_open_opts_init() and block_crypto_create_opts_init() contain a virtual visit of QCryptoBlockOptions and QCryptoBlockCreateOptions less member "format", respectively. Change their callers to put member "format" in the QDict, so they can use the generated visitors for these types instead. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/crypto.c | 99 +++++++------------------------------------------- block/crypto.h | 8 +--- block/qcow.c | 5 +-- block/qcow2.c | 10 ++--- 4 files changed, 22 insertions(+), 100 deletions(-) diff --git a/block/crypto.c b/block/crypto.c index 210c80d5c7..994172a3de 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -148,108 +148,36 @@ static QemuOptsList block_crypto_create_opts_luks = { QCryptoBlockOpenOptions * -block_crypto_open_opts_init(QCryptoBlockFormat format, - QDict *opts, - Error **errp) +block_crypto_open_opts_init(QDict *opts, Error **errp) { Visitor *v; - QCryptoBlockOpenOptions *ret = NULL; - Error *local_err = NULL; + QCryptoBlockOpenOptions *ret; - ret = g_new0(QCryptoBlockOpenOptions, 1); - ret->format = format; - - v = qobject_input_visitor_new_flat_confused(opts, &local_err); + v = qobject_input_visitor_new_flat_confused(opts, errp); if (!v) { - goto out; + return NULL; } - visit_start_struct(v, NULL, NULL, 0, &local_err); - if (local_err) { - goto out; - } + visit_type_QCryptoBlockOpenOptions(v, NULL, &ret, errp); - switch (format) { - case Q_CRYPTO_BLOCK_FORMAT_LUKS: - visit_type_QCryptoBlockOptionsLUKS_members( - v, &ret->u.luks, &local_err); - break; - - case Q_CRYPTO_BLOCK_FORMAT_QCOW: - visit_type_QCryptoBlockOptionsQCow_members( - v, &ret->u.qcow, &local_err); - break; - - default: - error_setg(&local_err, "Unsupported block format %d", format); - break; - } - if (!local_err) { - visit_check_struct(v, &local_err); - } - - visit_end_struct(v, NULL); - - out: - if (local_err) { - error_propagate(errp, local_err); - qapi_free_QCryptoBlockOpenOptions(ret); - ret = NULL; - } visit_free(v); return ret; } QCryptoBlockCreateOptions * -block_crypto_create_opts_init(QCryptoBlockFormat format, - QDict *opts, - Error **errp) +block_crypto_create_opts_init(QDict *opts, Error **errp) { Visitor *v; - QCryptoBlockCreateOptions *ret = NULL; - Error *local_err = NULL; + QCryptoBlockCreateOptions *ret; - ret = g_new0(QCryptoBlockCreateOptions, 1); - ret->format = format; - - v = qobject_input_visitor_new_flat_confused(opts, &local_err); + v = qobject_input_visitor_new_flat_confused(opts, errp); if (!v) { - goto out; + return NULL; } - visit_start_struct(v, NULL, NULL, 0, &local_err); - if (local_err) { - goto out; - } + visit_type_QCryptoBlockCreateOptions(v, NULL, &ret, errp); - switch (format) { - case Q_CRYPTO_BLOCK_FORMAT_LUKS: - visit_type_QCryptoBlockCreateOptionsLUKS_members( - v, &ret->u.luks, &local_err); - break; - - case Q_CRYPTO_BLOCK_FORMAT_QCOW: - visit_type_QCryptoBlockOptionsQCow_members( - v, &ret->u.qcow, &local_err); - break; - - default: - error_setg(&local_err, "Unsupported block format %d", format); - break; - } - if (!local_err) { - visit_check_struct(v, &local_err); - } - - visit_end_struct(v, NULL); - - out: - if (local_err) { - error_propagate(errp, local_err); - qapi_free_QCryptoBlockCreateOptions(ret); - ret = NULL; - } visit_free(v); return ret; } @@ -287,8 +215,9 @@ static int block_crypto_open_generic(QCryptoBlockFormat format, } cryptoopts = qemu_opts_to_qdict(opts, NULL); + qdict_put_str(cryptoopts, "format", QCryptoBlockFormat_str(format)); - open_opts = block_crypto_open_opts_init(format, cryptoopts, errp); + open_opts = block_crypto_open_opts_init(cryptoopts, errp); if (!open_opts) { goto cleanup; } @@ -612,8 +541,8 @@ static int coroutine_fn block_crypto_co_create_opts_luks(const char *filename, &block_crypto_create_opts_luks, true); - create_opts = block_crypto_create_opts_init(Q_CRYPTO_BLOCK_FORMAT_LUKS, - cryptoopts, errp); + qdict_put_str(cryptoopts, "format", "luks"); + create_opts = block_crypto_create_opts_init(cryptoopts, errp); if (!create_opts) { ret = -EINVAL; goto fail; diff --git a/block/crypto.h b/block/crypto.h index 0f985ea4e2..dd7d47903c 100644 --- a/block/crypto.h +++ b/block/crypto.h @@ -89,13 +89,9 @@ } QCryptoBlockCreateOptions * -block_crypto_create_opts_init(QCryptoBlockFormat format, - QDict *opts, - Error **errp); +block_crypto_create_opts_init(QDict *opts, Error **errp); QCryptoBlockOpenOptions * -block_crypto_open_opts_init(QCryptoBlockFormat format, - QDict *opts, - Error **errp); +block_crypto_open_opts_init(QDict *opts, Error **errp); #endif /* BLOCK_CRYPTO_H__ */ diff --git a/block/qcow.c b/block/qcow.c index 5532731b9f..8546fe5bb7 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -203,9 +203,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, ret = -EINVAL; goto fail; } - qdict_del(encryptopts, "format"); - crypto_opts = block_crypto_open_opts_init( - Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp); + qdict_put_str(encryptopts, "format", "qcow"); + crypto_opts = block_crypto_open_opts_init(encryptopts, errp); if (!crypto_opts) { ret = -EINVAL; goto fail; diff --git a/block/qcow2.c b/block/qcow2.c index 2d190aa00b..78aeb1f9ea 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1040,9 +1040,8 @@ static int qcow2_update_options_prepare(BlockDriverState *bs, ret = -EINVAL; goto fail; } - qdict_del(encryptopts, "format"); - r->crypto_opts = block_crypto_open_opts_init( - Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp); + qdict_put_str(encryptopts, "format", "qcow"); + r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp); break; case QCOW_CRYPT_LUKS: @@ -1053,9 +1052,8 @@ static int qcow2_update_options_prepare(BlockDriverState *bs, ret = -EINVAL; goto fail; } - qdict_del(encryptopts, "format"); - r->crypto_opts = block_crypto_open_opts_init( - Q_CRYPTO_BLOCK_FORMAT_LUKS, encryptopts, errp); + qdict_put_str(encryptopts, "format", "luks"); + r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp); break; default: From 93a3642efcadf4ad6045ccea38a05ff5297dfe26 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 28 Jun 2018 17:36:14 +0200 Subject: [PATCH 15/29] qemu-iotests: Update 026.out.nocache reference output Commit abf754fe406 updated 026.out, but forgot to also update 026.out.nocache. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- tests/qemu-iotests/026.out.nocache | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/qemu-iotests/026.out.nocache b/tests/qemu-iotests/026.out.nocache index ea2e166a4e..650ccd8b01 100644 --- a/tests/qemu-iotests/026.out.nocache +++ b/tests/qemu-iotests/026.out.nocache @@ -541,7 +541,7 @@ Failed to flush the L2 table cache: No space left on device Failed to flush the refcount block cache: No space left on device write failed: No space left on device -11 leaked clusters were found on the image. +10 leaked clusters were found on the image. This means waste of disk space, but no harm to data. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 @@ -569,7 +569,7 @@ Failed to flush the L2 table cache: No space left on device Failed to flush the refcount block cache: No space left on device write failed: No space left on device -11 leaked clusters were found on the image. +10 leaked clusters were found on the image. This means waste of disk space, but no harm to data. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 @@ -597,7 +597,7 @@ Failed to flush the L2 table cache: No space left on device Failed to flush the refcount block cache: No space left on device write failed: No space left on device -11 leaked clusters were found on the image. +10 leaked clusters were found on the image. This means waste of disk space, but no harm to data. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 From 8b24cd141549b5b264baeddd4e72902cfb5de23b Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 28 Jun 2018 17:05:45 +0200 Subject: [PATCH 16/29] qcow2: Free allocated clusters on write error If we managed to allocate the clusters, but then failed to write the data, there's a good chance that we'll still be able to free the clusters again in order to avoid cluster leaks (the refcounts are cached, so even if we can't write them out right now, we may be able to do so when the VM is resumed after a werror=stop/enospc pause). Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Reviewed-by: Eric Blake Tested-by: Eric Blake --- block/qcow2-cluster.c | 11 +++++++++++ block/qcow2.c | 2 ++ block/qcow2.h | 1 + 3 files changed, 14 insertions(+) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 0d74584c9b..d37fe08b3d 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -994,6 +994,17 @@ err: return ret; } +/** + * Frees the allocated clusters because the request failed and they won't + * actually be linked. + */ +void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) +{ + BDRVQcow2State *s = bs->opaque; + qcow2_free_clusters(bs, m->alloc_offset, m->nb_clusters << s->cluster_bits, + QCOW2_DISCARD_NEVER); +} + /* * Returns the number of contiguous clusters that can be used for an allocating * write, but require COW to be performed (this includes yet unallocated space, diff --git a/block/qcow2.c b/block/qcow2.c index 78aeb1f9ea..286640c0e3 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1775,6 +1775,8 @@ static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs, if (ret) { goto out; } + } else { + qcow2_alloc_cluster_abort(bs, l2meta); } /* Take the request off the list of running requests */ diff --git a/block/qcow2.h b/block/qcow2.h index 01b5250415..1c9c0d3631 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -614,6 +614,7 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, int compressed_size); int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m); +void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m); int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, uint64_t bytes, enum qcow2_discard_type type, bool full_discard); From ae376c6255d0eee4b3c4d60acc4679aa99c0d2c8 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 28 Jun 2018 17:18:51 +0200 Subject: [PATCH 17/29] qemu-iotests: Test qcow2 not leaking clusters on write error This adds a test for a temporary write failure, which simulates the situation after werror=stop/enospc has stopped the VM. We shouldn't leave leaked clusters behind in such cases. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- tests/qemu-iotests/026 | 17 +++++++++++++++++ tests/qemu-iotests/026.out | 8 ++++++++ tests/qemu-iotests/026.out.nocache | 8 ++++++++ 3 files changed, 33 insertions(+) diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026 index 7fadfbace5..582d254195 100755 --- a/tests/qemu-iotests/026 +++ b/tests/qemu-iotests/026 @@ -200,6 +200,23 @@ done done done +echo +echo === Avoid cluster leaks after temporary failure === +echo + +cat > "$TEST_DIR/blkdebug.conf" < Date: Thu, 21 Jun 2018 19:07:32 +0200 Subject: [PATCH 18/29] file-posix: Implement co versions of discard/flush This simplifies file-posix by implementing the coroutine variants of the discard and flush BlockDriver callbacks. These were the last remaining users of paio_submit(), which can be removed now. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block/file-posix.c | 72 ++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 48 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index 639265d53b..e3f1b045d1 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -1754,31 +1754,6 @@ static inline int paio_submit_co(BlockDriverState *bs, int fd, return paio_submit_co_full(bs, fd, offset, -1, 0, qiov, bytes, type); } -static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd, - int64_t offset, QEMUIOVector *qiov, int bytes, - BlockCompletionFunc *cb, void *opaque, int type) -{ - RawPosixAIOData *acb = g_new(RawPosixAIOData, 1); - ThreadPool *pool; - - acb->bs = bs; - acb->aio_type = type; - acb->aio_fildes = fd; - - acb->aio_nbytes = bytes; - acb->aio_offset = offset; - - if (qiov) { - acb->aio_iov = qiov->iov; - acb->aio_niov = qiov->niov; - assert(qiov->size == acb->aio_nbytes); - } - - trace_paio_submit(acb, opaque, offset, bytes, type); - pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); - return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque); -} - static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int type) { @@ -1845,15 +1820,17 @@ static void raw_aio_unplug(BlockDriverState *bs) #endif } -static BlockAIOCB *raw_aio_flush(BlockDriverState *bs, - BlockCompletionFunc *cb, void *opaque) +static int raw_co_flush_to_disk(BlockDriverState *bs) { BDRVRawState *s = bs->opaque; + int ret; - if (fd_open(bs) < 0) - return NULL; + ret = fd_open(bs); + if (ret < 0) { + return ret; + } - return paio_submit(bs, s->fd, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH); + return paio_submit_co(bs, s->fd, 0, NULL, 0, QEMU_AIO_FLUSH); } static void raw_aio_attach_aio_context(BlockDriverState *bs, @@ -2526,14 +2503,12 @@ static void coroutine_fn raw_co_invalidate_cache(BlockDriverState *bs, #endif /* !__linux__ */ } -static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes, - BlockCompletionFunc *cb, void *opaque) +static coroutine_fn int +raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) { BDRVRawState *s = bs->opaque; - return paio_submit(bs, s->fd, offset, NULL, bytes, - cb, opaque, QEMU_AIO_DISCARD); + return paio_submit_co(bs, s->fd, offset, NULL, bytes, QEMU_AIO_DISCARD); } static int coroutine_fn raw_co_pwrite_zeroes( @@ -2652,8 +2627,8 @@ BlockDriver bdrv_file = { .bdrv_co_preadv = raw_co_preadv, .bdrv_co_pwritev = raw_co_pwritev, - .bdrv_aio_flush = raw_aio_flush, - .bdrv_aio_pdiscard = raw_aio_pdiscard, + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, + .bdrv_co_pdiscard = raw_co_pdiscard, .bdrv_co_copy_range_from = raw_co_copy_range_from, .bdrv_co_copy_range_to = raw_co_copy_range_to, .bdrv_refresh_limits = raw_refresh_limits, @@ -3019,17 +2994,18 @@ static int fd_open(BlockDriverState *bs) return -EIO; } -static coroutine_fn BlockAIOCB *hdev_aio_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes, - BlockCompletionFunc *cb, void *opaque) +static coroutine_fn int +hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) { BDRVRawState *s = bs->opaque; + int ret; - if (fd_open(bs) < 0) { - return NULL; + ret = fd_open(bs); + if (ret < 0) { + return ret; } - return paio_submit(bs, s->fd, offset, NULL, bytes, - cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV); + return paio_submit_co(bs, s->fd, offset, NULL, bytes, + QEMU_AIO_DISCARD | QEMU_AIO_BLKDEV); } static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs, @@ -3133,8 +3109,8 @@ static BlockDriver bdrv_host_device = { .bdrv_co_preadv = raw_co_preadv, .bdrv_co_pwritev = raw_co_pwritev, - .bdrv_aio_flush = raw_aio_flush, - .bdrv_aio_pdiscard = hdev_aio_pdiscard, + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, + .bdrv_co_pdiscard = hdev_co_pdiscard, .bdrv_co_copy_range_from = raw_co_copy_range_from, .bdrv_co_copy_range_to = raw_co_copy_range_to, .bdrv_refresh_limits = raw_refresh_limits, @@ -3258,7 +3234,7 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_co_preadv = raw_co_preadv, .bdrv_co_pwritev = raw_co_pwritev, - .bdrv_aio_flush = raw_aio_flush, + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, .bdrv_refresh_limits = raw_refresh_limits, .bdrv_io_plug = raw_aio_plug, .bdrv_io_unplug = raw_aio_unplug, @@ -3388,7 +3364,7 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_co_preadv = raw_co_preadv, .bdrv_co_pwritev = raw_co_pwritev, - .bdrv_aio_flush = raw_aio_flush, + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, .bdrv_refresh_limits = raw_refresh_limits, .bdrv_io_plug = raw_aio_plug, .bdrv_io_unplug = raw_aio_unplug, From e06f4639d8a93703eecc3aad06c8a3e9b2ef4968 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Fri, 29 Jun 2018 14:03:26 +0800 Subject: [PATCH 19/29] qcow2: Fix src_offset in copy offloading Not updating src_offset will result in wrong data being written to dst image. Reported-by: Max Reitz Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/qcow2.c | 1 + tests/qemu-iotests/063 | 9 +++++++++ tests/qemu-iotests/063.out | 12 ++++++++++++ 3 files changed, 22 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c index 286640c0e3..2f9e58e0c4 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3387,6 +3387,7 @@ qcow2_co_copy_range_to(BlockDriverState *bs, } bytes -= cur_bytes; + src_offset += cur_bytes; dst_offset += cur_bytes; } ret = 0; diff --git a/tests/qemu-iotests/063 b/tests/qemu-iotests/063 index e4f6ea9385..adc037c1f5 100755 --- a/tests/qemu-iotests/063 +++ b/tests/qemu-iotests/063 @@ -91,6 +91,15 @@ if $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n "$TEST_IMG.orig" "$TEST_IMG" >/dev exit 1 fi +echo "== Regression testing for copy offloading bug ==" + +_make_test_img 1M +TEST_IMG="$TEST_IMG.target" _make_test_img 1M +$QEMU_IO -c 'write -P 1 0 512k' -c 'write -P 2 512k 512k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c 'write -P 4 512k 512k' -c 'write -P 3 0 512k' "$TEST_IMG.target" | _filter_qemu_io +$QEMU_IMG convert -n -O $IMGFMT "$TEST_IMG" "$TEST_IMG.target" +$QEMU_IMG compare "$TEST_IMG" "$TEST_IMG.target" + echo "*** done" rm -f $seq.full status=0 diff --git a/tests/qemu-iotests/063.out b/tests/qemu-iotests/063.out index de1c99afd8..7b691b2c9e 100644 --- a/tests/qemu-iotests/063.out +++ b/tests/qemu-iotests/063.out @@ -7,4 +7,16 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 No errors were found on the image. == Testing conversion to a smaller file fails == Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2097152 +== Regression testing for copy offloading bug == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +Formatting 'TEST_DIR/t.IMGFMT.target', fmt=IMGFMT size=1048576 +wrote 524288/524288 bytes at offset 0 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 524288/524288 bytes at offset 524288 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 524288/524288 bytes at offset 524288 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 524288/524288 bytes at offset 0 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Images are identical. *** done From 1439b9c11002348eb80fcd3c90f07bf0f4f088dc Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Fri, 29 Jun 2018 14:03:27 +0800 Subject: [PATCH 20/29] iscsi: Don't blindly use designator length in response for memcpy Per SCSI definition the designator_length we receive from INQUIRY is 8, 12 or at most 16, but we should be careful because the remote iscsi target may misbehave, otherwise we could have a buffer overflow. Reported-by: Max Reitz Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/iscsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/iscsi.c b/block/iscsi.c index bc84b14e20..9beb06d498 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -2226,7 +2226,7 @@ static void iscsi_populate_target_desc(unsigned char *desc, IscsiLun *lun) desc[5] = (dd->designator_type & 0xF) | ((dd->association & 3) << 4); desc[7] = dd->designator_length; - memcpy(desc + 8, dd->designator, dd->designator_length); + memcpy(desc + 8, dd->designator, MIN(dd->designator_length, 20)); desc[28] = 0; desc[29] = (lun->block_size >> 16) & 0xFF; From c436e3d0145a3952aacf1a4014434b82d7157633 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Fri, 29 Jun 2018 14:03:28 +0800 Subject: [PATCH 21/29] file-posix: Fix EINTR handling EINTR should be checked against errno, not ret. While fixing the bug, collect the branches with a switch block. Also, change the return value from -ENOSTUP to -ENOSPC when the actual issue is request range passes EOF, which should be distinguishable from the case of error == ENOSYS by the caller, so that it could still retry with other byte ranges, whereas it shouldn't retry anymore upon ENOSYS. Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/file-posix.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index e3f1b045d1..829ee538d8 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -1488,20 +1488,21 @@ static ssize_t handle_aiocb_copy_range(RawPosixAIOData *aiocb) ssize_t ret = copy_file_range(aiocb->aio_fildes, &in_off, aiocb->aio_fd2, &out_off, bytes, 0); - if (ret == -EINTR) { - continue; + if (ret == 0) { + /* No progress (e.g. when beyond EOF), let the caller fall back to + * buffer I/O. */ + return -ENOSPC; } if (ret < 0) { - if (errno == ENOSYS) { + switch (errno) { + case ENOSYS: return -ENOTSUP; - } else { + case EINTR: + continue; + default: return -errno; } } - if (!ret) { - /* No progress (e.g. when beyond EOF), fall back to buffer I/O. */ - return -ENOTSUP; - } bytes -= ret; } return 0; From d08c2a245feb6ab82b5a044f72c75964eedeaef5 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 28 Jun 2018 15:15:18 -0500 Subject: [PATCH 22/29] parallels: Switch to byte-based calls We are gradually moving away from sector-based interfaces, towards byte-based. Make the change for the last few sector-based calls into the block layer from the parallels driver. Ideally, the parallels driver should switch to doing everything byte-based, but that's a more invasive change that requires a bit more auditing. Signed-off-by: Eric Blake Reviewed-by: Stefan Hajnoczi Reviewed-by: Denis V. Lunev Reviewed-by: Jeff Cody Signed-off-by: Kevin Wolf --- block/parallels.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index fd215e202a..cc9445879d 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -227,14 +227,15 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, }; qemu_iovec_init_external(&qiov, &iov, 1); - ret = bdrv_co_readv(bs->backing, idx * s->tracks, nb_cow_sectors, - &qiov); + ret = bdrv_co_preadv(bs->backing, idx * s->tracks * BDRV_SECTOR_SIZE, + nb_cow_bytes, &qiov, 0); if (ret < 0) { qemu_vfree(iov.iov_base); return ret; } - ret = bdrv_co_writev(bs->file, s->data_end, nb_cow_sectors, &qiov); + ret = bdrv_co_pwritev(bs->file, s->data_end * BDRV_SECTOR_SIZE, + nb_cow_bytes, &qiov, 0); qemu_vfree(iov.iov_base); if (ret < 0) { return ret; @@ -340,7 +341,8 @@ static coroutine_fn int parallels_co_writev(BlockDriverState *bs, qemu_iovec_reset(&hd_qiov); qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes); - ret = bdrv_co_writev(bs->file, position, n, &hd_qiov); + ret = bdrv_co_pwritev(bs->file, position * BDRV_SECTOR_SIZE, nbytes, + &hd_qiov, 0); if (ret < 0) { break; } @@ -379,7 +381,8 @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs, if (position < 0) { if (bs->backing) { - ret = bdrv_co_readv(bs->backing, sector_num, n, &hd_qiov); + ret = bdrv_co_preadv(bs->backing, sector_num * BDRV_SECTOR_SIZE, + nbytes, &hd_qiov, 0); if (ret < 0) { break; } @@ -387,7 +390,8 @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs, qemu_iovec_memset(&hd_qiov, 0, 0, nbytes); } } else { - ret = bdrv_co_readv(bs->file, position, n, &hd_qiov); + ret = bdrv_co_preadv(bs->file, position * BDRV_SECTOR_SIZE, nbytes, + &hd_qiov, 0); if (ret < 0) { break; } From 787993a5435289e90479f80f81681c804a9d22ce Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 28 Jun 2018 15:15:19 -0500 Subject: [PATCH 23/29] qcow: Switch get_cluster_offset to be byte-based We are gradually moving away from sector-based interfaces, towards byte-based. Make the change for the internal helper function get_cluster_offset(), by changing n_start and n_end to be byte offsets rather than sector indices within the cluster being allocated. However, assert that these values are still sector-aligned (at least qcrypto_block_encrypt() still wants that). For now we get that alignment for free because we still use sector-based driver callbacks. A later patch will then switch the qcow driver as a whole over to byte-based operation; but will still leave things at sector alignments as it is not worth auditing the qcow image format to worry about sub-sector requests. Signed-off-by: Eric Blake Reviewed-by: Jeff Cody Signed-off-by: Kevin Wolf --- block/qcow.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/block/qcow.c b/block/qcow.c index 8546fe5bb7..d7c385a91d 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -345,8 +345,8 @@ static int qcow_reopen_prepare(BDRVReopenState *state, * * 0 to not allocate. * - * 1 to allocate a normal cluster (for sector indexes 'n_start' to - * 'n_end') + * 1 to allocate a normal cluster (for sector-aligned byte offsets 'n_start' + * to 'n_end' within the cluster) * * 2 to allocate a compressed cluster of size * 'compressed_size'. 'compressed_size' must be > 0 and < @@ -440,9 +440,10 @@ static int get_cluster_offset(BlockDriverState *bs, if (!allocate) return 0; BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC); + assert(QEMU_IS_ALIGNED(n_start | n_end, BDRV_SECTOR_SIZE)); /* allocate a new cluster */ if ((cluster_offset & QCOW_OFLAG_COMPRESSED) && - (n_end - n_start) < s->cluster_sectors) { + (n_end - n_start) < s->cluster_size) { /* if the cluster is already compressed, we must decompress it in the case it is not completely overwritten */ @@ -480,16 +481,15 @@ static int get_cluster_offset(BlockDriverState *bs, /* if encrypted, we must initialize the cluster content which won't be written */ if (bs->encrypted && - (n_end - n_start) < s->cluster_sectors) { - uint64_t start_sect; + (n_end - n_start) < s->cluster_size) { + uint64_t start_offset; assert(s->crypto); - start_sect = (offset & ~(s->cluster_size - 1)) >> 9; - for(i = 0; i < s->cluster_sectors; i++) { + start_offset = offset & ~(s->cluster_size - 1); + for (i = 0; i < s->cluster_size; i += BDRV_SECTOR_SIZE) { if (i < n_start || i >= n_end) { - memset(s->cluster_data, 0x00, 512); + memset(s->cluster_data, 0x00, BDRV_SECTOR_SIZE); if (qcrypto_block_encrypt(s->crypto, - (start_sect + i) * - BDRV_SECTOR_SIZE, + start_offset + i, s->cluster_data, BDRV_SECTOR_SIZE, NULL) < 0) { @@ -497,8 +497,9 @@ static int get_cluster_offset(BlockDriverState *bs, } BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); ret = bdrv_pwrite(bs->file, - cluster_offset + i * 512, - s->cluster_data, 512); + cluster_offset + i, + s->cluster_data, + BDRV_SECTOR_SIZE); if (ret < 0) { return ret; } @@ -758,8 +759,8 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, n = nb_sectors; } ret = get_cluster_offset(bs, sector_num << 9, 1, 0, - index_in_cluster, - index_in_cluster + n, &cluster_offset); + index_in_cluster << 9, + (index_in_cluster + n) << 9, &cluster_offset); if (ret < 0) { break; } From a15312b0172292273303b3690f9eb69f3c01339c Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 28 Jun 2018 15:15:20 -0500 Subject: [PATCH 24/29] qcow: Switch qcow_co_readv to byte-based calls We are gradually moving away from sector-based interfaces, towards byte-based. Make the change for the internals of the qcow driver read function, by iterating over offset/bytes instead of sector_num/nb_sectors, and with a rename of index_in_cluster and repurposing of n to track bytes instead of sectors. A later patch will then switch the qcow driver as a whole over to byte-based operation. Signed-off-by: Eric Blake Reviewed-by: Jeff Cody Signed-off-by: Kevin Wolf --- block/qcow.c | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/block/qcow.c b/block/qcow.c index d7c385a91d..436ad518be 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -617,13 +617,15 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { BDRVQcowState *s = bs->opaque; - int index_in_cluster; + int offset_in_cluster; int ret = 0, n; uint64_t cluster_offset; struct iovec hd_iov; QEMUIOVector hd_qiov; uint8_t *buf; void *orig_buf; + int64_t offset = sector_num * BDRV_SECTOR_SIZE; + int64_t bytes = nb_sectors * BDRV_SECTOR_SIZE; if (qiov->niov > 1) { buf = orig_buf = qemu_try_blockalign(bs, qiov->size); @@ -637,36 +639,35 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, qemu_co_mutex_lock(&s->lock); - while (nb_sectors != 0) { + while (bytes != 0) { /* prepare next request */ - ret = get_cluster_offset(bs, sector_num << 9, - 0, 0, 0, 0, &cluster_offset); + ret = get_cluster_offset(bs, offset, 0, 0, 0, 0, &cluster_offset); if (ret < 0) { break; } - index_in_cluster = sector_num & (s->cluster_sectors - 1); - n = s->cluster_sectors - index_in_cluster; - if (n > nb_sectors) { - n = nb_sectors; + offset_in_cluster = offset & (s->cluster_size - 1); + n = s->cluster_size - offset_in_cluster; + if (n > bytes) { + n = bytes; } if (!cluster_offset) { if (bs->backing) { /* read from the base image */ hd_iov.iov_base = (void *)buf; - hd_iov.iov_len = n * 512; + hd_iov.iov_len = n; qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); qemu_co_mutex_unlock(&s->lock); /* qcow2 emits this on bs->file instead of bs->backing */ BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); - ret = bdrv_co_readv(bs->backing, sector_num, n, &hd_qiov); + ret = bdrv_co_preadv(bs->backing, offset, n, &hd_qiov, 0); qemu_co_mutex_lock(&s->lock); if (ret < 0) { break; } } else { /* Note: in this case, no need to wait */ - memset(buf, 0, 512 * n); + memset(buf, 0, n); } } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { /* add AIO support for compressed blocks ? */ @@ -674,21 +675,19 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, ret = -EIO; break; } - memcpy(buf, - s->cluster_cache + index_in_cluster * 512, 512 * n); + memcpy(buf, s->cluster_cache + offset_in_cluster, n); } else { if ((cluster_offset & 511) != 0) { ret = -EIO; break; } hd_iov.iov_base = (void *)buf; - hd_iov.iov_len = n * 512; + hd_iov.iov_len = n; qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); qemu_co_mutex_unlock(&s->lock); BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); - ret = bdrv_co_readv(bs->file, - (cluster_offset >> 9) + index_in_cluster, - n, &hd_qiov); + ret = bdrv_co_preadv(bs->file, cluster_offset + offset_in_cluster, + n, &hd_qiov, 0); qemu_co_mutex_lock(&s->lock); if (ret < 0) { break; @@ -696,8 +695,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, if (bs->encrypted) { assert(s->crypto); if (qcrypto_block_decrypt(s->crypto, - sector_num * BDRV_SECTOR_SIZE, buf, - n * BDRV_SECTOR_SIZE, NULL) < 0) { + offset, buf, n, NULL) < 0) { ret = -EIO; break; } @@ -705,9 +703,9 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, } ret = 0; - nb_sectors -= n; - sector_num += n; - buf += n * 512; + bytes -= n; + offset += n; + buf += n; } qemu_co_mutex_unlock(&s->lock); From d1326a786d471840b9055af53a7aa57c2e6d858c Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 28 Jun 2018 15:15:21 -0500 Subject: [PATCH 25/29] qcow: Switch qcow_co_writev to byte-based calls We are gradually moving away from sector-based interfaces, towards byte-based. Make the change for the internals of the qcow driver write function, by iterating over offset/bytes instead of sector_num/nb_sectors, and with a rename of index_in_cluster and repurposing of n to track bytes instead of sectors. A later patch will then switch the qcow driver as a whole over to byte-based operation. Signed-off-by: Eric Blake Reviewed-by: Jeff Cody Signed-off-by: Kevin Wolf --- block/qcow.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/block/qcow.c b/block/qcow.c index 436ad518be..8fe82ed58d 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -723,13 +723,15 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, int flags) { BDRVQcowState *s = bs->opaque; - int index_in_cluster; + int offset_in_cluster; uint64_t cluster_offset; int ret = 0, n; struct iovec hd_iov; QEMUIOVector hd_qiov; uint8_t *buf; void *orig_buf; + int64_t offset = sector_num * BDRV_SECTOR_SIZE; + int64_t bytes = nb_sectors * BDRV_SECTOR_SIZE; assert(!flags); s->cluster_cache_offset = -1; /* disable compressed cache */ @@ -749,16 +751,14 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, qemu_co_mutex_lock(&s->lock); - while (nb_sectors != 0) { - - index_in_cluster = sector_num & (s->cluster_sectors - 1); - n = s->cluster_sectors - index_in_cluster; - if (n > nb_sectors) { - n = nb_sectors; + while (bytes != 0) { + offset_in_cluster = offset & (s->cluster_size - 1); + n = s->cluster_size - offset_in_cluster; + if (n > bytes) { + n = bytes; } - ret = get_cluster_offset(bs, sector_num << 9, 1, 0, - index_in_cluster << 9, - (index_in_cluster + n) << 9, &cluster_offset); + ret = get_cluster_offset(bs, offset, 1, 0, offset_in_cluster, + offset_in_cluster + n, &cluster_offset); if (ret < 0) { break; } @@ -768,30 +768,28 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, } if (bs->encrypted) { assert(s->crypto); - if (qcrypto_block_encrypt(s->crypto, sector_num * BDRV_SECTOR_SIZE, - buf, n * BDRV_SECTOR_SIZE, NULL) < 0) { + if (qcrypto_block_encrypt(s->crypto, offset, buf, n, NULL) < 0) { ret = -EIO; break; } } hd_iov.iov_base = (void *)buf; - hd_iov.iov_len = n * 512; + hd_iov.iov_len = n; qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); qemu_co_mutex_unlock(&s->lock); BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); - ret = bdrv_co_writev(bs->file, - (cluster_offset >> 9) + index_in_cluster, - n, &hd_qiov); + ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster, + n, &hd_qiov, 0); qemu_co_mutex_lock(&s->lock); if (ret < 0) { break; } ret = 0; - nb_sectors -= n; - sector_num += n; - buf += n * 512; + bytes -= n; + offset += n; + buf += n; } qemu_co_mutex_unlock(&s->lock); From 609841a3f8cb1761380032b7bbccb438abb8290f Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 28 Jun 2018 15:15:22 -0500 Subject: [PATCH 26/29] qcow: Switch to a byte-based driver We are gradually moving away from sector-based interfaces, towards byte-based. The qcow driver is now ready to fully utilize the byte-based callback interface, as long as we override the default alignment to still be 512 (needed at least for asserts present because of encryption, but easier to do everywhere than to audit which sub-sector requests are handled correctly, especially since we no longer recommend qcow for new disk images). Signed-off-by: Eric Blake Reviewed-by: Jeff Cody Signed-off-by: Kevin Wolf --- block/qcow.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/block/qcow.c b/block/qcow.c index 8fe82ed58d..102d058d1c 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -70,7 +70,6 @@ typedef struct QCowHeader { typedef struct BDRVQcowState { int cluster_bits; int cluster_size; - int cluster_sectors; int l2_bits; int l2_size; unsigned int l1_size; @@ -235,7 +234,6 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, } s->cluster_bits = header.cluster_bits; s->cluster_size = 1 << s->cluster_bits; - s->cluster_sectors = 1 << (s->cluster_bits - 9); s->l2_bits = header.l2_bits; s->l2_size = 1 << s->l2_bits; bs->total_sectors = header.size / 512; @@ -613,8 +611,18 @@ static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) return 0; } -static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) +static void qcow_refresh_limits(BlockDriverState *bs, Error **errp) +{ + /* At least encrypted images require 512-byte alignment. Apply the + * limit universally, rather than just on encrypted images, as + * it's easier to let the block layer handle rounding than to + * audit this code further. */ + bs->bl.request_alignment = BDRV_SECTOR_SIZE; +} + +static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, + int flags) { BDRVQcowState *s = bs->opaque; int offset_in_cluster; @@ -624,9 +632,8 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, QEMUIOVector hd_qiov; uint8_t *buf; void *orig_buf; - int64_t offset = sector_num * BDRV_SECTOR_SIZE; - int64_t bytes = nb_sectors * BDRV_SECTOR_SIZE; + assert(!flags); if (qiov->niov > 1) { buf = orig_buf = qemu_try_blockalign(bs, qiov->size); if (buf == NULL) { @@ -718,9 +725,9 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, return ret; } -static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov, - int flags) +static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, + int flags) { BDRVQcowState *s = bs->opaque; int offset_in_cluster; @@ -730,8 +737,6 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector hd_qiov; uint8_t *buf; void *orig_buf; - int64_t offset = sector_num * BDRV_SECTOR_SIZE; - int64_t bytes = nb_sectors * BDRV_SECTOR_SIZE; assert(!flags); s->cluster_cache_offset = -1; /* disable compressed cache */ @@ -1104,8 +1109,7 @@ qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, if (ret != Z_STREAM_END || out_len >= s->cluster_size) { /* could not compress: write normal cluster */ - ret = qcow_co_writev(bs, offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS, qiov, 0); + ret = qcow_co_pwritev(bs, offset, bytes, qiov, 0); if (ret < 0) { goto fail; } @@ -1190,9 +1194,10 @@ static BlockDriver bdrv_qcow = { .bdrv_co_create_opts = qcow_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, .supports_backing = true, + .bdrv_refresh_limits = qcow_refresh_limits, - .bdrv_co_readv = qcow_co_readv, - .bdrv_co_writev = qcow_co_writev, + .bdrv_co_preadv = qcow_co_preadv, + .bdrv_co_pwritev = qcow_co_pwritev, .bdrv_co_block_status = qcow_co_block_status, .bdrv_make_empty = qcow_make_empty, From 04a11d87d1dfdc5f8081398694db8849d010c12d Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 28 Jun 2018 15:15:23 -0500 Subject: [PATCH 27/29] replication: Switch to byte-based calls We are gradually moving away from sector-based interfaces, towards byte-based. Make the change for the last few sector-based calls into the block layer from the replication driver. Ideally, the replication driver should switch to doing everything byte-based, but that's a more invasive change that requires a bit more auditing. Signed-off-by: Eric Blake Reviewed-by: Jeff Cody Signed-off-by: Kevin Wolf --- block/replication.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/block/replication.c b/block/replication.c index 826db7b304..6349d6958e 100644 --- a/block/replication.c +++ b/block/replication.c @@ -246,13 +246,14 @@ static coroutine_fn int replication_co_readv(BlockDriverState *bs, backup_cow_request_begin(&req, child->bs->job, sector_num * BDRV_SECTOR_SIZE, remaining_bytes); - ret = bdrv_co_readv(bs->file, sector_num, remaining_sectors, - qiov); + ret = bdrv_co_preadv(bs->file, sector_num * BDRV_SECTOR_SIZE, + remaining_bytes, qiov, 0); backup_cow_request_end(&req); goto out; } - ret = bdrv_co_readv(bs->file, sector_num, remaining_sectors, qiov); + ret = bdrv_co_preadv(bs->file, sector_num * BDRV_SECTOR_SIZE, + remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0); out: return replication_return_value(s, ret); } @@ -279,8 +280,8 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs, } if (ret == 0) { - ret = bdrv_co_writev(top, sector_num, - remaining_sectors, qiov); + ret = bdrv_co_pwritev(top, sector_num * BDRV_SECTOR_SIZE, + remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0); return replication_return_value(s, ret); } @@ -306,7 +307,8 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs, qemu_iovec_concat(&hd_qiov, qiov, bytes_done, count); target = ret ? top : base; - ret = bdrv_co_writev(target, sector_num, n, &hd_qiov); + ret = bdrv_co_pwritev(target, sector_num * BDRV_SECTOR_SIZE, + n * BDRV_SECTOR_SIZE, &hd_qiov, 0); if (ret < 0) { goto out1; } From 3a7404b31e96156ea35be6fec938e162517e28d9 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 28 Jun 2018 15:15:24 -0500 Subject: [PATCH 28/29] vhdx: Switch to byte-based calls We are gradually moving away from sector-based interfaces, towards byte-based. Make the change for the last few sector-based calls into the block layer from the vhdx driver. Ideally, the vhdx driver should switch to doing everything byte-based, but that's a more invasive change that requires a bit more auditing. Signed-off-by: Eric Blake Reviewed-by: Jeff Cody Signed-off-by: Kevin Wolf --- block/vhdx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/block/vhdx.c b/block/vhdx.c index a677703a9e..4d0819750f 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1127,9 +1127,9 @@ static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num, break; case PAYLOAD_BLOCK_FULLY_PRESENT: qemu_co_mutex_unlock(&s->lock); - ret = bdrv_co_readv(bs->file, - sinfo.file_offset >> BDRV_SECTOR_BITS, - sinfo.sectors_avail, &hd_qiov); + ret = bdrv_co_preadv(bs->file, sinfo.file_offset, + sinfo.sectors_avail * BDRV_SECTOR_SIZE, + &hd_qiov, 0); qemu_co_mutex_lock(&s->lock); if (ret < 0) { goto exit; @@ -1349,9 +1349,9 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, } /* block exists, so we can just overwrite it */ qemu_co_mutex_unlock(&s->lock); - ret = bdrv_co_writev(bs->file, - sinfo.file_offset >> BDRV_SECTOR_BITS, - sectors_to_write, &hd_qiov); + ret = bdrv_co_pwritev(bs->file, sinfo.file_offset, + sectors_to_write * BDRV_SECTOR_SIZE, + &hd_qiov, 0); qemu_co_mutex_lock(&s->lock); if (ret < 0) { goto error_bat_restore; From 583c99d39368526dfb57a715b04a6ceea27dbe1e Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 28 Jun 2018 15:15:25 -0500 Subject: [PATCH 29/29] block: Remove unused sector-based vectored I/O We are gradually moving away from sector-based interfaces, towards byte-based. Now that all callers of vectored I/O have been converted to use our preferred byte-based bdrv_co_p{read,write}v(), we can delete the unused bdrv_co_{read,write}v(). Furthermore, this gets rid of the signature difference between the public bdrv_co_writev() and the callback .bdrv_co_writev (the latter still exists, because some drivers still need more work before they are fully byte-based). Signed-off-by: Eric Blake Reviewed-by: Stefan Hajnoczi Reviewed-by: Jeff Cody Signed-off-by: Kevin Wolf --- block/io.c | 36 ------------------------------------ include/block/block.h | 4 ---- 2 files changed, 40 deletions(-) diff --git a/block/io.c b/block/io.c index b63822280a..7035b78a20 100644 --- a/block/io.c +++ b/block/io.c @@ -1429,24 +1429,6 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, return ret; } -static int coroutine_fn bdrv_co_do_readv(BdrvChild *child, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, - BdrvRequestFlags flags) -{ - if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { - return -EINVAL; - } - - return bdrv_co_preadv(child, sector_num << BDRV_SECTOR_BITS, - nb_sectors << BDRV_SECTOR_BITS, qiov, flags); -} - -int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) -{ - return bdrv_co_do_readv(child, sector_num, nb_sectors, qiov, 0); -} - static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, BdrvRequestFlags flags) { @@ -1889,24 +1871,6 @@ out: return ret; } -static int coroutine_fn bdrv_co_do_writev(BdrvChild *child, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, - BdrvRequestFlags flags) -{ - if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { - return -EINVAL; - } - - return bdrv_co_pwritev(child, sector_num << BDRV_SECTOR_BITS, - nb_sectors << BDRV_SECTOR_BITS, qiov, flags); -} - -int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) -{ - return bdrv_co_do_writev(child, sector_num, nb_sectors, qiov, 0); -} - int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, int bytes, BdrvRequestFlags flags) { diff --git a/include/block/block.h b/include/block/block.h index 42e59ff585..2ffc1c64c6 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -285,10 +285,6 @@ int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes); int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov); int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, const void *buf, int count); -int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov); -int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov); /* * Efficiently zero a region of the disk image. Note that this is a regular * I/O request like read or write and should have a reasonable size. This