From dc80ca6cd6cafdee174af3c883d85e19b7fd70a5 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 18 Jun 2014 17:58:29 +0800 Subject: [PATCH 01/23] virtio-blk: avoid qdev property definition duplication It becomes unwiedly to duplicate all virtio-blk qdev property definitions due to an #ifdef. The C preprocessor syntax makes it a little hard to resolve this cleanly but we can extract the #ifdef and call a macro it defines later. Avoiding duplication is important since it will only get worse when we move the x-data-plane qdev property here too. We'd have a combinatorial explosion since x-data-plane has its own #ifdef. Suggested-by: Peter Crosthwaite Signed-off-by: Stefan Hajnoczi Reviewed-by: Peter Crosthwaite --- include/hw/virtio/virtio-blk.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index d0fb26f963..ee43f7a24a 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -156,21 +156,19 @@ typedef struct VirtIOBlockReq { DEFINE_VIRTIO_COMMON_FEATURES(_state, _field) #ifdef __linux__ -#define DEFINE_VIRTIO_BLK_PROPERTIES(_state, _field) \ - DEFINE_BLOCK_PROPERTIES(_state, _field.conf), \ - DEFINE_BLOCK_CHS_PROPERTIES(_state, _field.conf), \ - DEFINE_PROP_STRING("serial", _state, _field.serial), \ - DEFINE_PROP_BIT("config-wce", _state, _field.config_wce, 0, true), \ - DEFINE_PROP_BIT("scsi", _state, _field.scsi, 0, true), \ - DEFINE_PROP_IOTHREAD("x-iothread", _state, _field.iothread) +#define DEFINE_VIRTIO_BLK_PROPERTIES_LINUX(_state, _field) \ + DEFINE_PROP_BIT("scsi", _state, _field.scsi, 0, true), #else +#define DEFINE_VIRTIO_BLK_PROPERTIES_LINUX(_state, _field) +#endif + #define DEFINE_VIRTIO_BLK_PROPERTIES(_state, _field) \ + DEFINE_VIRTIO_BLK_PROPERTIES_LINUX(_state, _field) \ DEFINE_BLOCK_PROPERTIES(_state, _field.conf), \ DEFINE_BLOCK_CHS_PROPERTIES(_state, _field.conf), \ DEFINE_PROP_STRING("serial", _state, _field.serial), \ DEFINE_PROP_BIT("config-wce", _state, _field.config_wce, 0, true), \ DEFINE_PROP_IOTHREAD("x-iothread", _state, _field.iothread) -#endif /* __linux__ */ void virtio_blk_set_conf(DeviceState *dev, VirtIOBlkConf *blk); From a9968c77d5a56211ae22be2e6a1dcb07f2016010 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Wed, 18 Jun 2014 17:58:30 +0800 Subject: [PATCH 02/23] dataplane: bail out on unsupported transport If the virtio transport does not support notifiers (like s390-virtio), we can't use dataplane. Bail out early and let the user know what is wrong. Signed-off-by: Cornelia Huck Reviewed-by: Stefan Hajnoczi Signed-off-by: Stefan Hajnoczi --- hw/block/dataplane/virtio-blk.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index 09bd2c70ab..f6e1a5d919 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -127,6 +127,8 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk, VirtIOBlockDataPlane *s; VirtIOBlock *vblk = VIRTIO_BLK(vdev); Error *local_err = NULL; + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); *dataplane = NULL; @@ -134,6 +136,14 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk, return; } + /* Don't try if transport does not support notifiers. */ + if (!k->set_guest_notifiers || !k->set_host_notifier) { + error_setg(errp, + "device is incompatible with x-data-plane " + "(transport does not support notifiers)"); + return; + } + /* If dataplane is (re-)enabled while the guest is running there could be * block jobs that can conflict. */ From ee512c6f21b2f76ae923e3a4cabbd58899ff7ae1 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 18 Jun 2014 17:58:31 +0800 Subject: [PATCH 03/23] virtio-blk: move x-data-plane qdev property to virtio-blk.h Move the x-data-plane property. Originally it was outside since not every transport may wish to support dataplane. But that makes little sense when we have a dedicated CONFIG_VIRTIO_BLK_DATA_PLANE ifdef already. This move makes it easier to switch to property aliases in the next patch. Signed-off-by: Stefan Hajnoczi Reviewed-by: Peter Crosthwaite --- hw/s390x/virtio-ccw.c | 3 --- hw/virtio/virtio-pci.c | 3 --- include/hw/virtio/virtio-blk.h | 8 ++++++++ 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index 05656a2887..d7ff0a0929 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -1403,9 +1403,6 @@ static Property virtio_ccw_blk_properties[] = { DEFINE_VIRTIO_BLK_PROPERTIES(VirtIOBlkCcw, blk), DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), -#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE - DEFINE_PROP_BIT("x-data-plane", VirtIOBlkCcw, blk.data_plane, 0, false), -#endif DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 317324f23d..653d74e801 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1066,9 +1066,6 @@ static Property virtio_blk_pci_properties[] = { DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2), -#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE - DEFINE_PROP_BIT("x-data-plane", VirtIOBlkPCI, blk.data_plane, 0, false), -#endif DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features), DEFINE_VIRTIO_BLK_PROPERTIES(VirtIOBlkPCI, blk), DEFINE_PROP_END_OF_LIST(), diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index ee43f7a24a..1d80bcc0dc 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -162,8 +162,16 @@ typedef struct VirtIOBlockReq { #define DEFINE_VIRTIO_BLK_PROPERTIES_LINUX(_state, _field) #endif +#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE +#define DEFINE_VIRTIO_BLK_PROPERTIES_DATA_PLANE(_state, _field) \ + DEFINE_PROP_BIT("x-data-plane", _state, _field.data_plane, 0, false), +#else +#define DEFINE_VIRTIO_BLK_PROPERTIES_DATA_PLANE(_state, _field) +#endif + #define DEFINE_VIRTIO_BLK_PROPERTIES(_state, _field) \ DEFINE_VIRTIO_BLK_PROPERTIES_LINUX(_state, _field) \ + DEFINE_VIRTIO_BLK_PROPERTIES_DATA_PLANE(_state, _field) \ DEFINE_BLOCK_PROPERTIES(_state, _field.conf), \ DEFINE_BLOCK_CHS_PROPERTIES(_state, _field.conf), \ DEFINE_PROP_STRING("serial", _state, _field.serial), \ From 67cc7e0aaca835ed68cf3bd34f4d51a21232792f Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 18 Jun 2014 17:58:32 +0800 Subject: [PATCH 04/23] qdev: add qdev_alias_all_properties() The qdev_alias_all_properties() function creates QOM alias properties for each qdev property on a DeviceState. This is useful for parent objects that wish to forward property accesses to their children. Signed-off-by: Stefan Hajnoczi Reviewed-by: Peter Crosthwaite --- hw/core/qdev.c | 21 +++++++++++++++++++++ include/hw/qdev-properties.h | 2 ++ 2 files changed, 23 insertions(+) diff --git a/hw/core/qdev.c b/hw/core/qdev.c index d1eba3cc3d..732e7294c2 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -780,6 +780,27 @@ void qdev_property_add_static(DeviceState *dev, Property *prop, } } +/* @qdev_alias_all_properties - Add alias properties to the source object for + * all qdev properties on the target DeviceState. + */ +void qdev_alias_all_properties(DeviceState *target, Object *source) +{ + ObjectClass *class; + Property *prop; + + class = object_get_class(OBJECT(target)); + do { + DeviceClass *dc = DEVICE_CLASS(class); + + for (prop = dc->props; prop && prop->name; prop++) { + object_property_add_alias(source, prop->name, + OBJECT(target), prop->name, + &error_abort); + } + class = object_class_get_parent(class); + } while (class != object_class_by_name(TYPE_DEVICE)); +} + static bool device_get_realized(Object *obj, Error **errp) { DeviceState *dev = DEVICE(obj); diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index c962b6bbaa..3726bf331b 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -193,6 +193,8 @@ void error_set_from_qdev_prop_error(Error **errp, int ret, DeviceState *dev, */ void qdev_property_add_static(DeviceState *dev, Property *prop, Error **errp); +void qdev_alias_all_properties(DeviceState *target, Object *source); + /** * @qdev_prop_set_after_realize: * @dev: device From caffdac363801cd2cf2bf01ad013a8c1e1e43800 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 18 Jun 2014 17:58:33 +0800 Subject: [PATCH 05/23] virtio-blk: use aliases instead of duplicate qdev properties virtio-blk-pci, virtio-blk-s390, and virtio-blk-ccw all duplicate the qdev properties of their VirtIOBlock child. This approach does not work well with string or pointer properties since we must be careful about leaking or double-freeing them. Use the QOM alias property to forward property accesses to the VirtIOBlock child. This way no duplication is necessary. Remember to stop calling virtio_blk_set_conf() so that we don't clobber the values already set on the VirtIOBlock instance. Signed-off-by: Stefan Hajnoczi Reviewed-by: Peter Crosthwaite --- hw/s390x/s390-virtio-bus.c | 9 +-------- hw/s390x/s390-virtio-bus.h | 1 - hw/s390x/virtio-ccw.c | 3 +-- hw/s390x/virtio-ccw.h | 1 - hw/virtio/virtio-pci.c | 3 +-- hw/virtio/virtio-pci.h | 1 - 6 files changed, 3 insertions(+), 15 deletions(-) diff --git a/hw/s390x/s390-virtio-bus.c b/hw/s390x/s390-virtio-bus.c index 7c8c81b0cc..38984ab439 100644 --- a/hw/s390x/s390-virtio-bus.c +++ b/hw/s390x/s390-virtio-bus.c @@ -167,7 +167,6 @@ static int s390_virtio_blk_init(VirtIOS390Device *s390_dev) { VirtIOBlkS390 *dev = VIRTIO_BLK_S390(s390_dev); DeviceState *vdev = DEVICE(&dev->vdev); - virtio_blk_set_conf(vdev, &(dev->blk)); qdev_set_parent_bus(vdev, BUS(&s390_dev->bus)); if (qdev_init(vdev) < 0) { return -1; @@ -180,6 +179,7 @@ static void s390_virtio_blk_instance_init(Object *obj) VirtIOBlkS390 *dev = VIRTIO_BLK_S390(obj); object_initialize(&dev->vdev, sizeof(dev->vdev), TYPE_VIRTIO_BLK); object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL); + qdev_alias_all_properties(DEVICE(&dev->vdev), obj); } static int s390_virtio_serial_init(VirtIOS390Device *s390_dev) @@ -513,18 +513,11 @@ static const TypeInfo s390_virtio_net = { .class_init = s390_virtio_net_class_init, }; -static Property s390_virtio_blk_properties[] = { - DEFINE_VIRTIO_BLK_PROPERTIES(VirtIOBlkS390, blk), - DEFINE_PROP_END_OF_LIST(), -}; - static void s390_virtio_blk_class_init(ObjectClass *klass, void *data) { - DeviceClass *dc = DEVICE_CLASS(klass); VirtIOS390DeviceClass *k = VIRTIO_S390_DEVICE_CLASS(klass); k->init = s390_virtio_blk_init; - dc->props = s390_virtio_blk_properties; } static const TypeInfo s390_virtio_blk = { diff --git a/hw/s390x/s390-virtio-bus.h b/hw/s390x/s390-virtio-bus.h index ac81bd89ee..ffd0df708b 100644 --- a/hw/s390x/s390-virtio-bus.h +++ b/hw/s390x/s390-virtio-bus.h @@ -124,7 +124,6 @@ void s390_virtio_reset_idx(VirtIOS390Device *dev); typedef struct VirtIOBlkS390 { VirtIOS390Device parent_obj; VirtIOBlock vdev; - VirtIOBlkConf blk; } VirtIOBlkS390; /* virtio-scsi-s390 */ diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index d7ff0a0929..9fa6f32d93 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -800,7 +800,6 @@ static int virtio_ccw_blk_init(VirtioCcwDevice *ccw_dev) { VirtIOBlkCcw *dev = VIRTIO_BLK_CCW(ccw_dev); DeviceState *vdev = DEVICE(&dev->vdev); - virtio_blk_set_conf(vdev, &(dev->blk)); qdev_set_parent_bus(vdev, BUS(&ccw_dev->bus)); if (qdev_init(vdev) < 0) { return -1; @@ -814,6 +813,7 @@ static void virtio_ccw_blk_instance_init(Object *obj) VirtIOBlkCcw *dev = VIRTIO_BLK_CCW(obj); object_initialize(&dev->vdev, sizeof(dev->vdev), TYPE_VIRTIO_BLK); object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL); + qdev_alias_all_properties(DEVICE(&dev->vdev), obj); } static int virtio_ccw_serial_init(VirtioCcwDevice *ccw_dev) @@ -1400,7 +1400,6 @@ static const TypeInfo virtio_ccw_net = { static Property virtio_ccw_blk_properties[] = { DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id), DEFINE_VIRTIO_BLK_FEATURES(VirtioCcwDevice, host_features[0]), - DEFINE_VIRTIO_BLK_PROPERTIES(VirtIOBlkCcw, blk), DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_END_OF_LIST(), diff --git a/hw/s390x/virtio-ccw.h b/hw/s390x/virtio-ccw.h index b8b8a8abaa..5a1f16ee5d 100644 --- a/hw/s390x/virtio-ccw.h +++ b/hw/s390x/virtio-ccw.h @@ -144,7 +144,6 @@ typedef struct VHostSCSICcw { typedef struct VirtIOBlkCcw { VirtioCcwDevice parent_obj; VirtIOBlock vdev; - VirtIOBlkConf blk; } VirtIOBlkCcw; /* virtio-balloon-ccw */ diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 653d74e801..7359d8d354 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1067,7 +1067,6 @@ static Property virtio_blk_pci_properties[] = { VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2), DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features), - DEFINE_VIRTIO_BLK_PROPERTIES(VirtIOBlkPCI, blk), DEFINE_PROP_END_OF_LIST(), }; @@ -1075,7 +1074,6 @@ static int virtio_blk_pci_init(VirtIOPCIProxy *vpci_dev) { VirtIOBlkPCI *dev = VIRTIO_BLK_PCI(vpci_dev); DeviceState *vdev = DEVICE(&dev->vdev); - virtio_blk_set_conf(vdev, &(dev->blk)); qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); if (qdev_init(vdev) < 0) { return -1; @@ -1103,6 +1101,7 @@ static void virtio_blk_pci_instance_init(Object *obj) VirtIOBlkPCI *dev = VIRTIO_BLK_PCI(obj); object_initialize(&dev->vdev, sizeof(dev->vdev), TYPE_VIRTIO_BLK); object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL); + qdev_alias_all_properties(DEVICE(&dev->vdev), obj); } static const TypeInfo virtio_blk_pci_info = { diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index dc332ae774..1cea157a47 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -131,7 +131,6 @@ struct VHostSCSIPCI { struct VirtIOBlkPCI { VirtIOPCIProxy parent_obj; VirtIOBlock vdev; - VirtIOBlkConf blk; }; /* From f7fedda84a8eb316c99a9de647c4844b862a7b38 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 18 Jun 2014 17:58:34 +0800 Subject: [PATCH 06/23] virtio-blk: drop virtio_blk_set_conf() This function is no longer used since parent objects now use child aliases to set the VirtIOBlkConf directly. Signed-off-by: Stefan Hajnoczi Reviewed-by: Peter Crosthwaite --- hw/block/virtio-blk.c | 6 ------ include/hw/virtio/virtio-blk.h | 2 -- 2 files changed, 8 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index e59ebc962d..f1a667c672 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -677,12 +677,6 @@ static const BlockDevOps virtio_block_ops = { .resize_cb = virtio_blk_resize, }; -void virtio_blk_set_conf(DeviceState *dev, VirtIOBlkConf *blk) -{ - VirtIOBlock *s = VIRTIO_BLK(dev); - memcpy(&(s->blk), blk, sizeof(struct VirtIOBlkConf)); -} - #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE /* Disable dataplane thread during live migration since it does not * update the dirty memory bitmap yet. diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index 1d80bcc0dc..52e5add925 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -178,8 +178,6 @@ typedef struct VirtIOBlockReq { DEFINE_PROP_BIT("config-wce", _state, _field.config_wce, 0, true), \ DEFINE_PROP_IOTHREAD("x-iothread", _state, _field.iothread) -void virtio_blk_set_conf(DeviceState *dev, VirtIOBlkConf *blk); - int virtio_blk_handle_scsi_req(VirtIOBlock *blk, VirtQueueElement *elem); From c5d49db4469dfc0cc7f4c01dfb4ed4e8b96bdbcd Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 18 Jun 2014 17:58:35 +0800 Subject: [PATCH 07/23] virtio: fix virtio-blk child refcount in transports object_initialize() leaves the object with a refcount of 1. object_property_add_child() adds its own reference which is dropped again when the property is deleted. The upshot of this is that we always have a refcount >= 1. Upon hot unplug the virtio-blk child is not finalized! Drop our reference after the child property has been added to the parent. Signed-off-by: Stefan Hajnoczi Reviewed-by: Peter Crosthwaite --- hw/s390x/s390-virtio-bus.c | 1 + hw/s390x/virtio-ccw.c | 1 + hw/virtio/virtio-pci.c | 1 + 3 files changed, 3 insertions(+) diff --git a/hw/s390x/s390-virtio-bus.c b/hw/s390x/s390-virtio-bus.c index 38984ab439..3438a88b1e 100644 --- a/hw/s390x/s390-virtio-bus.c +++ b/hw/s390x/s390-virtio-bus.c @@ -179,6 +179,7 @@ static void s390_virtio_blk_instance_init(Object *obj) VirtIOBlkS390 *dev = VIRTIO_BLK_S390(obj); object_initialize(&dev->vdev, sizeof(dev->vdev), TYPE_VIRTIO_BLK); object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL); + object_unref(OBJECT(&dev->vdev)); qdev_alias_all_properties(DEVICE(&dev->vdev), obj); } diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index 9fa6f32d93..0553fea3e4 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -813,6 +813,7 @@ static void virtio_ccw_blk_instance_init(Object *obj) VirtIOBlkCcw *dev = VIRTIO_BLK_CCW(obj); object_initialize(&dev->vdev, sizeof(dev->vdev), TYPE_VIRTIO_BLK); object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL); + object_unref(OBJECT(&dev->vdev)); qdev_alias_all_properties(DEVICE(&dev->vdev), obj); } diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 7359d8d354..d41b864fb4 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1101,6 +1101,7 @@ static void virtio_blk_pci_instance_init(Object *obj) VirtIOBlkPCI *dev = VIRTIO_BLK_PCI(obj); object_initialize(&dev->vdev, sizeof(dev->vdev), TYPE_VIRTIO_BLK); object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL); + object_unref(OBJECT(&dev->vdev)); qdev_alias_all_properties(DEVICE(&dev->vdev), obj); } From 32a877e4059ad7fd428bdd31d3e954fed72fa21b Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 18 Jun 2014 17:58:36 +0800 Subject: [PATCH 08/23] virtio-blk: move qdev properties into virtio-blk.c There is no need to make DEFINE_VIRTIO_BLK_PROPERTIES() public. Inline it into virtio-blk.c so it cannot be used by mistake from other source files. Signed-off-by: Stefan Hajnoczi Reviewed-by: Peter Crosthwaite --- hw/block/virtio-blk.c | 12 +++++++++++- include/hw/virtio/virtio-blk.h | 23 ----------------------- 2 files changed, 11 insertions(+), 24 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index f1a667c672..b8d51bb360 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -785,7 +785,17 @@ static void virtio_blk_device_unrealize(DeviceState *dev, Error **errp) } static Property virtio_blk_properties[] = { - DEFINE_VIRTIO_BLK_PROPERTIES(VirtIOBlock, blk), + DEFINE_BLOCK_PROPERTIES(VirtIOBlock, blk.conf), + DEFINE_BLOCK_CHS_PROPERTIES(VirtIOBlock, blk.conf), + DEFINE_PROP_STRING("serial", VirtIOBlock, blk.serial), + DEFINE_PROP_BIT("config-wce", VirtIOBlock, blk.config_wce, 0, true), + DEFINE_PROP_IOTHREAD("x-iothread", VirtIOBlock, blk.iothread), +#ifdef __linux__ + DEFINE_PROP_BIT("scsi", VirtIOBlock, blk.scsi, 0, true), +#endif +#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE + DEFINE_PROP_BIT("x-data-plane", VirtIOBlock, blk.data_plane, 0, false), +#endif DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index 52e5add925..223530e18a 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -155,29 +155,6 @@ typedef struct VirtIOBlockReq { #define DEFINE_VIRTIO_BLK_FEATURES(_state, _field) \ DEFINE_VIRTIO_COMMON_FEATURES(_state, _field) -#ifdef __linux__ -#define DEFINE_VIRTIO_BLK_PROPERTIES_LINUX(_state, _field) \ - DEFINE_PROP_BIT("scsi", _state, _field.scsi, 0, true), -#else -#define DEFINE_VIRTIO_BLK_PROPERTIES_LINUX(_state, _field) -#endif - -#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE -#define DEFINE_VIRTIO_BLK_PROPERTIES_DATA_PLANE(_state, _field) \ - DEFINE_PROP_BIT("x-data-plane", _state, _field.data_plane, 0, false), -#else -#define DEFINE_VIRTIO_BLK_PROPERTIES_DATA_PLANE(_state, _field) -#endif - -#define DEFINE_VIRTIO_BLK_PROPERTIES(_state, _field) \ - DEFINE_VIRTIO_BLK_PROPERTIES_LINUX(_state, _field) \ - DEFINE_VIRTIO_BLK_PROPERTIES_DATA_PLANE(_state, _field) \ - DEFINE_BLOCK_PROPERTIES(_state, _field.conf), \ - DEFINE_BLOCK_CHS_PROPERTIES(_state, _field.conf), \ - DEFINE_PROP_STRING("serial", _state, _field.serial), \ - DEFINE_PROP_BIT("config-wce", _state, _field.config_wce, 0, true), \ - DEFINE_PROP_IOTHREAD("x-iothread", _state, _field.iothread) - int virtio_blk_handle_scsi_req(VirtIOBlock *blk, VirtQueueElement *elem); From 467b3f33e9b094bf6db5a4d6e6905f077edca0fb Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 10 Jun 2014 09:03:20 +0200 Subject: [PATCH 09/23] virtio-blk: replace x-iothread with iothread link property Up until now -device virtio-blk-pci,x-iothread= was used to assign an IOThread. This was a temporary solution while we cleaned up QOM link properties. This patch switches over to a QOM link property since it is now possible to restrict the setter to unrealized instances and automatically unref the IOThread when the virtio-blk-pci device is freed. Since the "iothread" property is a QOM property and not a qdev property, we must alias it explicitly for virtio-blk-pci, as well as CCW and s390-virtio. Signed-off-by: Stefan Hajnoczi --- hw/block/virtio-blk.c | 12 +++++++++++- hw/s390x/s390-virtio-bus.c | 2 ++ hw/s390x/virtio-ccw.c | 2 ++ hw/virtio/virtio-pci.c | 2 ++ 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index b8d51bb360..aec3146755 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -784,12 +784,21 @@ static void virtio_blk_device_unrealize(DeviceState *dev, Error **errp) virtio_cleanup(vdev); } +static void virtio_blk_instance_init(Object *obj) +{ + VirtIOBlock *s = VIRTIO_BLK(obj); + + object_property_add_link(obj, "iothread", TYPE_IOTHREAD, + (Object **)&s->blk.iothread, + qdev_prop_allow_set_link_before_realize, + OBJ_PROP_LINK_UNREF_ON_RELEASE, NULL); +} + static Property virtio_blk_properties[] = { DEFINE_BLOCK_PROPERTIES(VirtIOBlock, blk.conf), DEFINE_BLOCK_CHS_PROPERTIES(VirtIOBlock, blk.conf), DEFINE_PROP_STRING("serial", VirtIOBlock, blk.serial), DEFINE_PROP_BIT("config-wce", VirtIOBlock, blk.config_wce, 0, true), - DEFINE_PROP_IOTHREAD("x-iothread", VirtIOBlock, blk.iothread), #ifdef __linux__ DEFINE_PROP_BIT("scsi", VirtIOBlock, blk.scsi, 0, true), #endif @@ -821,6 +830,7 @@ static const TypeInfo virtio_device_info = { .name = TYPE_VIRTIO_BLK, .parent = TYPE_VIRTIO_DEVICE, .instance_size = sizeof(VirtIOBlock), + .instance_init = virtio_blk_instance_init, .class_init = virtio_blk_class_init, }; diff --git a/hw/s390x/s390-virtio-bus.c b/hw/s390x/s390-virtio-bus.c index 3438a88b1e..c0dc3658c7 100644 --- a/hw/s390x/s390-virtio-bus.c +++ b/hw/s390x/s390-virtio-bus.c @@ -181,6 +181,8 @@ static void s390_virtio_blk_instance_init(Object *obj) object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL); object_unref(OBJECT(&dev->vdev)); qdev_alias_all_properties(DEVICE(&dev->vdev), obj); + object_property_add_alias(obj, "iothread", OBJECT(&dev->vdev),"iothread", + &error_abort); } static int s390_virtio_serial_init(VirtIOS390Device *s390_dev) diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index 0553fea3e4..c2799685f2 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -815,6 +815,8 @@ static void virtio_ccw_blk_instance_init(Object *obj) object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL); object_unref(OBJECT(&dev->vdev)); qdev_alias_all_properties(DEVICE(&dev->vdev), obj); + object_property_add_alias(obj, "iothread", OBJECT(&dev->vdev),"iothread", + &error_abort); } static int virtio_ccw_serial_init(VirtioCcwDevice *ccw_dev) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index d41b864fb4..3c42cda82b 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1103,6 +1103,8 @@ static void virtio_blk_pci_instance_init(Object *obj) object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL); object_unref(OBJECT(&dev->vdev)); qdev_alias_all_properties(DEVICE(&dev->vdev), obj); + object_property_add_alias(obj, "iothread", OBJECT(&dev->vdev),"iothread", + &error_abort); } static const TypeInfo virtio_blk_pci_info = { From 1351d1ec89eabebc9fdff20451a62c413d7accc1 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 10 Jun 2014 09:03:21 +0200 Subject: [PATCH 10/23] qdev: drop iothread property type The iothread property type is no longer used and can be removed. Signed-off-by: Stefan Hajnoczi --- hw/core/qdev-properties-system.c | 50 -------------------------------- include/hw/qdev-properties.h | 3 -- 2 files changed, 53 deletions(-) diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 52c2f8afa5..8e140af46f 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -385,56 +385,6 @@ void qdev_set_nic_properties(DeviceState *dev, NICInfo *nd) nd->instantiated = 1; } -/* --- iothread --- */ - -static char *print_iothread(void *ptr) -{ - return iothread_get_id(ptr); -} - -static int parse_iothread(DeviceState *dev, const char *str, void **ptr) -{ - IOThread *iothread; - - iothread = iothread_find(str); - if (!iothread) { - return -ENOENT; - } - object_ref(OBJECT(iothread)); - *ptr = iothread; - return 0; -} - -static void get_iothread(Object *obj, struct Visitor *v, void *opaque, - const char *name, Error **errp) -{ - get_pointer(obj, v, opaque, print_iothread, name, errp); -} - -static void set_iothread(Object *obj, struct Visitor *v, void *opaque, - const char *name, Error **errp) -{ - set_pointer(obj, v, opaque, parse_iothread, name, errp); -} - -static void release_iothread(Object *obj, const char *name, void *opaque) -{ - DeviceState *dev = DEVICE(obj); - Property *prop = opaque; - IOThread **ptr = qdev_get_prop_ptr(dev, prop); - - if (*ptr) { - object_unref(OBJECT(*ptr)); - } -} - -PropertyInfo qdev_prop_iothread = { - .name = "iothread", - .get = get_iothread, - .set = set_iothread, - .release = release_iothread, -}; - static int qdev_add_one_global(QemuOpts *opts, void *opaque) { GlobalProperty *g; diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index 3726bf331b..77fe3a12b7 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -22,7 +22,6 @@ extern PropertyInfo qdev_prop_bios_chs_trans; extern PropertyInfo qdev_prop_drive; extern PropertyInfo qdev_prop_netdev; extern PropertyInfo qdev_prop_vlan; -extern PropertyInfo qdev_prop_iothread; extern PropertyInfo qdev_prop_pci_devfn; extern PropertyInfo qdev_prop_blocksize; extern PropertyInfo qdev_prop_pci_host_devaddr; @@ -143,8 +142,6 @@ extern PropertyInfo qdev_prop_arraylen; DEFINE_PROP(_n, _s, _f, qdev_prop_vlan, NICPeers) #define DEFINE_PROP_DRIVE(_n, _s, _f) \ DEFINE_PROP(_n, _s, _f, qdev_prop_drive, BlockDriverState *) -#define DEFINE_PROP_IOTHREAD(_n, _s, _f) \ - DEFINE_PROP(_n, _s, _f, qdev_prop_iothread, IOThread *) #define DEFINE_PROP_MACADDR(_n, _s, _f) \ DEFINE_PROP(_n, _s, _f, qdev_prop_macaddr, MACAddr) #define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \ From 8698e110f8aa1cdf8ba1afdda8f2658333a4ab01 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 10 Jun 2014 09:03:22 +0200 Subject: [PATCH 11/23] virtio-blk: remove need for explicit x-data-plane=on option The x-data-plane=on|off option is no longer useful because the iothread= option conveys the same information plus which IOThread to use. Do not delete x-data-plane=on|off yet as a convenience to people using this legacy experimental option. We will drop it in QEMU 2.2. Instead, turn on data-plane when either x-data-plane=on or iothread= are used. The following command-line uses data-plane: qemu -device virtio-blk-pci,iothread=foo,drive=drive0 Signed-off-by: Stefan Hajnoczi --- hw/block/dataplane/virtio-blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index f6e1a5d919..4c5ba18122 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -132,7 +132,7 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk, *dataplane = NULL; - if (!blk->data_plane) { + if (!blk->data_plane && !blk->iothread) { return; } From 4ab1559085d688dddf4de77f1ead3102e243e668 Mon Sep 17 00:00:00 2001 From: Chunyan Liu Date: Mon, 30 Jun 2014 14:29:58 +0800 Subject: [PATCH 12/23] qemu-img create: add 'nocow' option Add 'nocow' option so that users could have a chance to set NOCOW flag to newly created files. It's useful on btrfs file system to enhance performance. Btrfs has low performance when hosting VM images, even more when the guest in those VM are also using btrfs as file system. One way to mitigate this bad performance is to turn off COW attributes on VM files. Generally, there are two ways to turn off NOCOW on btrfs: a) by mounting fs with nodatacow, then all newly created files will be NOCOW. b) per file. Add the NOCOW file attribute. It could only be done to empty or new files. This patch tries the second way, according to the option, it could add NOCOW per file. For most block drivers, since the create file step is in raw-posix.c, so we can do setting NOCOW flag ioctl in raw-posix.c only. But there are some exceptions, like block/vpc.c and block/vdi.c, they are creating file by calling qemu_open directly. For them, do the same setting NOCOW flag ioctl work in them separately. [Fixed up 082.out due to the new 'nocow' creation option --Stefan] Signed-off-by: Chunyan Liu Signed-off-by: Stefan Hajnoczi --- block/qed.c | 6 +++--- block/raw-posix.c | 25 +++++++++++++++++++++++++ block/vdi.c | 29 +++++++++++++++++++++++++++++ block/vmdk.c | 6 +++--- block/vpc.c | 29 +++++++++++++++++++++++++++++ include/block/block_int.h | 1 + qemu-doc.texi | 16 ++++++++++++++++ qemu-img.texi | 16 ++++++++++++++++ tests/qemu-iotests/082.out | 24 ++++++++++++++++++++++++ 9 files changed, 146 insertions(+), 6 deletions(-) diff --git a/block/qed.c b/block/qed.c index eddae929eb..b69374b6a2 100644 --- a/block/qed.c +++ b/block/qed.c @@ -567,7 +567,7 @@ static void bdrv_qed_close(BlockDriverState *bs) static int qed_create(const char *filename, uint32_t cluster_size, uint64_t image_size, uint32_t table_size, const char *backing_file, const char *backing_fmt, - Error **errp) + QemuOpts *opts, Error **errp) { QEDHeader header = { .magic = QED_MAGIC, @@ -586,7 +586,7 @@ static int qed_create(const char *filename, uint32_t cluster_size, int ret = 0; BlockDriverState *bs; - ret = bdrv_create_file(filename, NULL, &local_err); + ret = bdrv_create_file(filename, opts, &local_err); if (ret < 0) { error_propagate(errp, local_err); return ret; @@ -682,7 +682,7 @@ static int bdrv_qed_create(const char *filename, QemuOpts *opts, Error **errp) } ret = qed_create(filename, cluster_size, image_size, table_size, - backing_file, backing_fmt, errp); + backing_file, backing_fmt, opts, errp); finish: g_free(backing_file); diff --git a/block/raw-posix.c b/block/raw-posix.c index dacf4fbbc8..825a0c878f 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -55,6 +55,9 @@ #include #include #include +#ifndef FS_NOCOW_FL +#define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#endif #endif #ifdef CONFIG_FIEMAP #include @@ -1278,12 +1281,14 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp) int fd; int result = 0; int64_t total_size = 0; + bool nocow = false; strstart(filename, "file:", &filename); /* Read out options */ total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / BDRV_SECTOR_SIZE; + nocow = qemu_opt_get_bool(opts, BLOCK_OPT_NOCOW, false); fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); @@ -1291,6 +1296,21 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp) result = -errno; error_setg_errno(errp, -result, "Could not create file"); } else { + if (nocow) { +#ifdef __linux__ + /* Set NOCOW flag to solve performance issue on fs like btrfs. + * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value + * will be ignored since any failure of this operation should not + * block the left work. + */ + int attr; + if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) { + attr |= FS_NOCOW_FL; + ioctl(fd, FS_IOC_SETFLAGS, &attr); + } +#endif + } + if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) { result = -errno; error_setg_errno(errp, -result, "Could not resize file"); @@ -1477,6 +1497,11 @@ static QemuOptsList raw_create_opts = { .type = QEMU_OPT_SIZE, .help = "Virtual disk size" }, + { + .name = BLOCK_OPT_NOCOW, + .type = QEMU_OPT_BOOL, + .help = "Turn off copy-on-write (valid only on btrfs)" + }, { /* end of list */ } } }; diff --git a/block/vdi.c b/block/vdi.c index 01fe22ebe8..197bd77c97 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -53,6 +53,13 @@ #include "block/block_int.h" #include "qemu/module.h" #include "migration/migration.h" +#ifdef __linux__ +#include +#include +#ifndef FS_NOCOW_FL +#define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#endif +#endif #if defined(CONFIG_UUID) #include @@ -683,6 +690,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) VdiHeader header; size_t i; size_t bmap_size; + bool nocow = false; logout("\n"); @@ -699,6 +707,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) image_type = VDI_TYPE_STATIC; } #endif + nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false); if (bytes > VDI_DISK_SIZE_MAX) { result = -ENOTSUP; @@ -716,6 +725,21 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) goto exit; } + if (nocow) { +#ifdef __linux__ + /* Set NOCOW flag to solve performance issue on fs like btrfs. + * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will + * be ignored since any failure of this operation should not block the + * left work. + */ + int attr; + if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) { + attr |= FS_NOCOW_FL; + ioctl(fd, FS_IOC_SETFLAGS, &attr); + } +#endif + } + /* We need enough blocks to store the given disk size, so always round up. */ blocks = (bytes + block_size - 1) / block_size; @@ -818,6 +842,11 @@ static QemuOptsList vdi_create_opts = { .def_value_str = "off" }, #endif + { + .name = BLOCK_OPT_NOCOW, + .type = QEMU_OPT_BOOL, + .help = "Turn off copy-on-write (valid only on btrfs)" + }, /* TODO: An additional option to set UUID values might be useful. */ { /* end of list */ } } diff --git a/block/vmdk.c b/block/vmdk.c index d0de0193fc..27a78daa02 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1529,7 +1529,7 @@ static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs, static int vmdk_create_extent(const char *filename, int64_t filesize, bool flat, bool compress, bool zeroed_grain, - Error **errp) + QemuOpts *opts, Error **errp) { int ret, i; BlockDriverState *bs = NULL; @@ -1539,7 +1539,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, uint32_t *gd_buf = NULL; int gd_buf_size; - ret = bdrv_create_file(filename, NULL, &local_err); + ret = bdrv_create_file(filename, opts, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto exit; @@ -1845,7 +1845,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) path, desc_filename); if (vmdk_create_extent(ext_filename, size, - flat, compress, zeroed_grain, errp)) { + flat, compress, zeroed_grain, opts, errp)) { ret = -EINVAL; goto exit; } diff --git a/block/vpc.c b/block/vpc.c index 798d8540db..8b376a40be 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -29,6 +29,13 @@ #if defined(CONFIG_UUID) #include #endif +#ifdef __linux__ +#include +#include +#ifndef FS_NOCOW_FL +#define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#endif +#endif /**************************************************************/ @@ -751,6 +758,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) int64_t total_size; int disk_type; int ret = -EIO; + bool nocow = false; /* Read out options */ total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); @@ -767,6 +775,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) } else { disk_type = VHD_DYNAMIC; } + nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false); /* Create the file */ fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); @@ -775,6 +784,21 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) goto out; } + if (nocow) { +#ifdef __linux__ + /* Set NOCOW flag to solve performance issue on fs like btrfs. + * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will + * be ignored since any failure of this operation should not block the + * left work. + */ + int attr; + if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) { + attr |= FS_NOCOW_FL; + ioctl(fd, FS_IOC_SETFLAGS, &attr); + } +#endif + } + /* * Calculate matching total_size and geometry. Increase the number of * sectors requested until we get enough (or fail). This ensures that @@ -884,6 +908,11 @@ static QemuOptsList vpc_create_opts = { "Type of virtual hard disk format. Supported formats are " "{dynamic (default) | fixed} " }, + { + .name = BLOCK_OPT_NOCOW, + .type = QEMU_OPT_BOOL, + .help = "Turn off copy-on-write (valid only on btrfs)" + }, { /* end of list */ } } }; diff --git a/include/block/block_int.h b/include/block/block_int.h index 53e77cf11e..eaf6e313d5 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -54,6 +54,7 @@ #define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts" #define BLOCK_OPT_ADAPTER_TYPE "adapter_type" #define BLOCK_OPT_REDUNDANCY "redundancy" +#define BLOCK_OPT_NOCOW "nocow" typedef struct BdrvTrackedRequest { BlockDriverState *bs; diff --git a/qemu-doc.texi b/qemu-doc.texi index 88ec9bb133..ad92c85cba 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -589,6 +589,22 @@ check -r all} is required, which may take some time. This option can only be enabled if @code{compat=1.1} is specified. +@item nocow +If this option is set to @code{on}, it will trun off COW of the file. It's only +valid on btrfs, no effect on other file systems. + +Btrfs has low performance when hosting a VM image file, even more when the guest +on the VM also using btrfs as file system. Turning off COW is a way to mitigate +this bad performance. Generally there are two ways to turn off COW on btrfs: +a) Disable it by mounting with nodatacow, then all newly created files will be +NOCOW. b) For an empty file, add the NOCOW file attribute. That's what this option +does. + +Note: this option is only valid to new or empty files. If there is an existing +file which is COW and has data blocks already, it couldn't be changed to NOCOW +by setting @code{nocow=on}. One can issue @code{lsattr filename} to check if +the NOCOW flag is set or not (Capitabl 'C' is NOCOW flag). + @end table @item qed diff --git a/qemu-img.texi b/qemu-img.texi index c68b54148a..8496f3b8dc 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -474,6 +474,22 @@ check -r all} is required, which may take some time. This option can only be enabled if @code{compat=1.1} is specified. +@item nocow +If this option is set to @code{on}, it will trun off COW of the file. It's only +valid on btrfs, no effect on other file systems. + +Btrfs has low performance when hosting a VM image file, even more when the guest +on the VM also using btrfs as file system. Turning off COW is a way to mitigate +this bad performance. Generally there are two ways to turn off COW on btrfs: +a) Disable it by mounting with nodatacow, then all newly created files will be +NOCOW. b) For an empty file, add the NOCOW file attribute. That's what this option +does. + +Note: this option is only valid to new or empty files. If there is an existing +file which is COW and has data blocks already, it couldn't be changed to NOCOW +by setting @code{nocow=on}. One can issue @code{lsattr filename} to check if +the NOCOW flag is set or not (Capitabl 'C' is NOCOW flag). + @end table @item Other diff --git a/tests/qemu-iotests/082.out b/tests/qemu-iotests/082.out index 28309a0327..413e7ef391 100644 --- a/tests/qemu-iotests/082.out +++ b/tests/qemu-iotests/082.out @@ -66,6 +66,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o ? TEST_DIR/t.qcow2 128M Supported options: @@ -77,6 +78,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o cluster_size=4k,help TEST_DIR/t.qcow2 128M Supported options: @@ -88,6 +90,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o cluster_size=4k,? TEST_DIR/t.qcow2 128M Supported options: @@ -99,6 +102,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o help,cluster_size=4k TEST_DIR/t.qcow2 128M Supported options: @@ -110,6 +114,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o ?,cluster_size=4k TEST_DIR/t.qcow2 128M Supported options: @@ -121,6 +126,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o cluster_size=4k -o help TEST_DIR/t.qcow2 128M Supported options: @@ -132,6 +138,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o cluster_size=4k -o ? TEST_DIR/t.qcow2 128M Supported options: @@ -143,6 +150,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o backing_file=TEST_DIR/t.qcow2,,help TEST_DIR/t.qcow2 128M Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/t.qcow2,help' encryption=off cluster_size=65536 lazy_refcounts=off @@ -247,6 +255,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o ? TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -258,6 +267,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o cluster_size=4k,help TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -269,6 +279,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o cluster_size=4k,? TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -280,6 +291,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o help,cluster_size=4k TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -291,6 +303,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o ?,cluster_size=4k TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -302,6 +315,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o cluster_size=4k -o help TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -313,6 +327,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o cluster_size=4k -o ? TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -324,6 +339,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o backing_file=TEST_DIR/t.qcow2,,help TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base qemu-img: Could not open 'TEST_DIR/t.qcow2.base': Could not open backing file: Could not open 'TEST_DIR/t.qcow2,help': No such file or directory @@ -417,6 +433,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o ? TEST_DIR/t.qcow2 Supported options: @@ -428,6 +445,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o cluster_size=4k,help TEST_DIR/t.qcow2 Supported options: @@ -439,6 +457,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o cluster_size=4k,? TEST_DIR/t.qcow2 Supported options: @@ -450,6 +469,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o help,cluster_size=4k TEST_DIR/t.qcow2 Supported options: @@ -461,6 +481,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o ?,cluster_size=4k TEST_DIR/t.qcow2 Supported options: @@ -472,6 +493,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o cluster_size=4k -o help TEST_DIR/t.qcow2 Supported options: @@ -483,6 +505,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o cluster_size=4k -o ? TEST_DIR/t.qcow2 Supported options: @@ -494,6 +517,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o backing_file=TEST_DIR/t.qcow2,,help TEST_DIR/t.qcow2 From 214a081a0dd44e69f7db4a741880a8ae9d174fa2 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 27 Jun 2014 22:47:46 +0200 Subject: [PATCH 13/23] iotests: Simplify qemu-iotests-quick.sh As of the "iotests: Allow out-of-tree run" series, the qemu-iotests may (and should) be run directly in the build tree and will then guess the binary paths themselves. Therefore, qemu-iotests-quick.sh does not need to (and should not) enter the source path anymore; also, it does not need to specify the binaries because "check" will guess them automatically. As a side-effect, tests using qemu may now be added to the quick group. Signed-off-by: Max Reitz Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests-quick.sh | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/tests/qemu-iotests-quick.sh b/tests/qemu-iotests-quick.sh index c449e8ab4a..8a9a4c68e9 100755 --- a/tests/qemu-iotests-quick.sh +++ b/tests/qemu-iotests-quick.sh @@ -1,16 +1,6 @@ #!/bin/sh -# We don't know which of the system emulator binaries there is (or if there is -# any at all), so the 'quick' group doesn't contain any tests that require -# running qemu proper. Assign a fake binary name so that qemu-iotests doesn't -# complain about the missing binary. -export QEMU_PROG="this_should_be_unused" - -export QEMU_IMG_PROG="$(pwd)/qemu-img" -export QEMU_IO_PROG="$(pwd)/qemu-io" -export QEMU_NBD_PROG="$(pwd)/qemu-nbd" - -cd $SRC_PATH/tests/qemu-iotests +cd tests/qemu-iotests ret=0 ./check -T -nocache -qcow2 -g quick || ret=1 From ee9dd1fc901f7098a1c2af5a2202bd7f41124dbc Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 27 Jun 2014 22:47:47 +0200 Subject: [PATCH 14/23] iotests: Add qemu tests to quick group Now that qemu-iotests-quick.sh supports tests using the qemu binary, we are free to add such tests to the quick group. Signed-off-by: Max Reitz Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/group | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index e3dc4e8754..7a2bfcafdc 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -51,7 +51,7 @@ 042 rw auto quick 043 rw auto backing 044 rw auto -045 rw auto +045 rw auto quick 046 rw auto aio 047 rw auto 048 img auto quick @@ -71,13 +71,13 @@ 062 rw auto quick 063 rw auto quick 064 rw auto quick -065 rw auto +065 rw auto quick 066 rw auto quick -067 rw auto -068 rw auto +067 rw auto quick +068 rw auto quick 069 rw auto quick 070 rw auto quick -071 rw auto +071 rw auto quick 072 rw auto quick 073 rw auto quick 074 rw auto quick @@ -87,16 +87,16 @@ 078 rw auto 079 rw auto 080 rw auto -081 rw auto +081 rw auto quick 082 rw auto quick 083 rw auto 084 img auto 085 rw auto 086 rw auto quick -087 rw auto +087 rw auto quick 088 rw auto 089 rw auto quick 090 rw auto quick -091 rw auto +091 rw auto quick 092 rw auto quick -095 rw auto +095 rw auto quick From c891e3bbc5bc3d74ad0ba896852a9796ceb147c2 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 27 Jun 2014 22:47:48 +0200 Subject: [PATCH 15/23] iotests: Add more tests to quick group While at it, add some more tests to the quick group (those that run with -nocache in under three seconds on my HDD). Signed-off-by: Max Reitz Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/group | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 7a2bfcafdc..6e67f61262 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -7,16 +7,16 @@ # # test-group association ... one line per test # -001 rw auto +001 rw auto quick 002 rw auto quick 003 rw auto 004 rw auto quick -005 img auto +005 img auto quick 006 img auto 007 snapshot auto -008 rw auto -009 rw auto -010 rw auto +008 rw auto quick +009 rw auto quick +010 rw auto quick 011 rw auto quick 012 auto quick 013 rw auto @@ -24,36 +24,36 @@ 015 rw snapshot auto 016 rw auto quick 017 rw backing auto quick -018 rw backing auto +018 rw backing auto quick 019 rw backing auto quick 020 rw backing auto quick -021 io auto +021 io auto quick 022 rw snapshot auto 023 rw auto 024 rw backing auto quick 025 rw auto quick 026 rw blkdbg auto 027 rw auto quick -028 rw backing auto +028 rw backing auto quick 029 rw auto quick 030 rw auto backing 031 rw auto quick -032 rw auto +032 rw auto quick 033 rw auto quick -034 rw auto backing +034 rw auto backing quick 035 rw auto quick 036 rw auto quick -037 rw auto backing -038 rw auto backing -039 rw auto +037 rw auto backing quick +038 rw auto backing quick +039 rw auto quick 040 rw auto 041 rw auto backing 042 rw auto quick 043 rw auto backing 044 rw auto 045 rw auto quick -046 rw auto aio -047 rw auto +046 rw auto aio quick +047 rw auto quick 048 img auto quick 049 rw auto 050 rw auto backing quick @@ -81,20 +81,20 @@ 072 rw auto quick 073 rw auto quick 074 rw auto quick -075 rw auto +075 rw auto quick 076 auto 077 rw auto quick -078 rw auto +078 rw auto quick 079 rw auto 080 rw auto 081 rw auto quick 082 rw auto quick 083 rw auto -084 img auto +084 img auto quick 085 rw auto 086 rw auto quick 087 rw auto quick -088 rw auto +088 rw auto quick 089 rw auto quick 090 rw auto quick 091 rw auto quick From 7676e2c597000eff3a7233b40cca768b358f9bc9 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Mon, 30 Jun 2014 15:14:15 +0200 Subject: [PATCH 16/23] block: make 'top' argument to block-commit optional Now that active layer block-commit is supported, the 'top' argument no longer needs to be mandatory. Change it to optional, with the default being the active layer in the device chain. [kwolf: Rebased and resolved conflict in tests/qemu-iotests/040] Reviewed-by: Eric Blake Reviewed-by: Benoit Canet Signed-off-by: Jeff Cody Signed-off-by: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- blockdev.c | 16 ++++++++++++++-- qapi/block-core.json | 7 ++++--- qmp-commands.hx | 5 +++-- tests/qemu-iotests/040 | 28 ++++++++++++++++++---------- 4 files changed, 39 insertions(+), 17 deletions(-) diff --git a/blockdev.c b/blockdev.c index 69b7c2a8c5..79ce52b58b 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1913,7 +1913,8 @@ void qmp_block_stream(const char *device, bool has_base, } void qmp_block_commit(const char *device, - bool has_base, const char *base, const char *top, + bool has_base, const char *base, + bool has_top, const char *top, bool has_speed, int64_t speed, Error **errp) { @@ -1932,6 +1933,11 @@ void qmp_block_commit(const char *device, /* drain all i/o before commits */ bdrv_drain_all(); + /* Important Note: + * libvirt relies on the DeviceNotFound error class in order to probe for + * live commit feature versions; for this to work, we must make sure to + * perform the device lookup before any generic errors that may occur in a + * scenario in which all optional arguments are omitted. */ bs = bdrv_find(device); if (!bs) { error_set(errp, QERR_DEVICE_NOT_FOUND, device); @@ -1945,7 +1951,7 @@ void qmp_block_commit(const char *device, /* default top_bs is the active layer */ top_bs = bs; - if (top) { + if (has_top && top) { if (strcmp(bs->filename, top) != 0) { top_bs = bdrv_find_backing_image(bs, top); } @@ -1967,6 +1973,12 @@ void qmp_block_commit(const char *device, return; } + /* Do not allow attempts to commit an image into itself */ + if (top_bs == base_bs) { + error_setg(errp, "cannot commit an image into itself"); + return; + } + if (top_bs == bs) { commit_active_start(bs, base_bs, speed, on_error, block_job_cb, bs, &local_err); diff --git a/qapi/block-core.json b/qapi/block-core.json index faf394cc76..6a697f1458 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -690,8 +690,9 @@ # @base: #optional The file name of the backing image to write data into. # If not specified, this is the deepest backing image # -# @top: The file name of the backing image within the image chain, -# which contains the topmost data to be committed down. +# @top: #optional The file name of the backing image within the image chain, +# which contains the topmost data to be committed down. If +# not specified, this is the active layer. # # If top == base, that is an error. # If top == active, the job will not be completed by itself, @@ -719,7 +720,7 @@ # ## { 'command': 'block-commit', - 'data': { 'device': 'str', '*base': 'str', 'top': 'str', + 'data': { 'device': 'str', '*base': 'str', '*top': 'str', '*speed': 'int' } } ## diff --git a/qmp-commands.hx b/qmp-commands.hx index 1ea18b22a3..8c5fdb5ca1 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -985,7 +985,7 @@ EQMP { .name = "block-commit", - .args_type = "device:B,base:s?,top:s,speed:o?", + .args_type = "device:B,base:s?,top:s?,speed:o?", .mhandler.cmd_new = qmp_marshal_input_block_commit, }, @@ -1003,7 +1003,8 @@ Arguments: If not specified, this is the deepest backing image (json-string, optional) - "top": The file name of the backing image within the image chain, - which contains the topmost data to be committed down. + which contains the topmost data to be committed down. If + not specified, this is the active layer. (json-string, optional) If top == base, that is an error. If top == active, the job will not be completed by itself, diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040 index d1668109dd..f1e16c11c7 100755 --- a/tests/qemu-iotests/040 +++ b/tests/qemu-iotests/040 @@ -35,11 +35,7 @@ test_img = os.path.join(iotests.test_dir, 'test.img') class ImageCommitTestCase(iotests.QMPTestCase): '''Abstract base class for image commit test cases''' - def run_commit_test(self, top, base, need_ready=False): - self.assert_no_active_block_jobs() - result = self.vm.qmp('block-commit', device='drive0', top=top, base=base) - self.assert_qmp(result, 'return', {}) - + def wait_for_complete(self, need_ready=False): completed = False ready = False while not completed: @@ -62,6 +58,18 @@ class ImageCommitTestCase(iotests.QMPTestCase): self.assert_no_active_block_jobs() self.vm.shutdown() + def run_commit_test(self, top, base, need_ready=False): + self.assert_no_active_block_jobs() + result = self.vm.qmp('block-commit', device='drive0', top=top, base=base) + self.assert_qmp(result, 'return', {}) + self.wait_for_complete(need_ready) + + def run_default_commit_test(self): + self.assert_no_active_block_jobs() + result = self.vm.qmp('block-commit', device='drive0') + self.assert_qmp(result, 'return', {}) + self.wait_for_complete() + class TestSingleDrive(ImageCommitTestCase): image_len = 1 * 1024 * 1024 test_len = 1 * 1024 * 256 @@ -113,17 +121,17 @@ class TestSingleDrive(ImageCommitTestCase): self.assertEqual(-1, qemu_io('-c', 'read -P 0xab 0 524288', backing_img).find("verification failed")) self.assertEqual(-1, qemu_io('-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed")) + def test_top_is_default_active(self): + self.run_default_commit_test() + self.assertEqual(-1, qemu_io('-c', 'read -P 0xab 0 524288', backing_img).find("verification failed")) + self.assertEqual(-1, qemu_io('-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed")) + def test_top_and_base_reversed(self): self.assert_no_active_block_jobs() result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % mid_img) self.assert_qmp(result, 'error/class', 'GenericError') self.assert_qmp(result, 'error/desc', 'Base \'%s\' not found' % mid_img) - def test_top_omitted(self): - self.assert_no_active_block_jobs() - result = self.vm.qmp('block-commit', device='drive0') - self.assert_qmp(result, 'error/class', 'GenericError') - self.assert_qmp(result, 'error/desc', "Parameter 'top' is missing") class TestRelativePaths(ImageCommitTestCase): image_len = 1 * 1024 * 1024 From 4caf0fcd45db46920a6264b1621ae6adc772ef19 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 25 Jun 2014 15:35:26 -0400 Subject: [PATCH 17/23] block: simplify bdrv_find_base() and bdrv_find_overlay() This simplifies the function bdrv_find_overlay(). With this change, bdrv_find_base() is just a subset of usage of bdrv_find_overlay(), so this also takes advantage of that. Reviewed-by: Eric Blake Reviewed-by: Benoit Canet Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- block.c | 45 ++++++++++----------------------------------- 1 file changed, 10 insertions(+), 35 deletions(-) diff --git a/block.c b/block.c index 6856c18aa8..c111c297f7 100644 --- a/block.c +++ b/block.c @@ -2508,32 +2508,23 @@ int bdrv_change_backing_file(BlockDriverState *bs, * * Returns NULL if bs is not found in active's image chain, * or if active == bs. + * + * Returns the bottommost base image if bs == NULL. */ BlockDriverState *bdrv_find_overlay(BlockDriverState *active, BlockDriverState *bs) { - BlockDriverState *overlay = NULL; - BlockDriverState *intermediate; - - assert(active != NULL); - assert(bs != NULL); - - /* if bs is the same as active, then by definition it has no overlay - */ - if (active == bs) { - return NULL; + while (active && bs != active->backing_hd) { + active = active->backing_hd; } - intermediate = active; - while (intermediate->backing_hd) { - if (intermediate->backing_hd == bs) { - overlay = intermediate; - break; - } - intermediate = intermediate->backing_hd; - } + return active; +} - return overlay; +/* Given a BDS, searches for the base layer. */ +BlockDriverState *bdrv_find_base(BlockDriverState *bs) +{ + return bdrv_find_overlay(bs, NULL); } typedef struct BlkIntermediateStates { @@ -4326,22 +4317,6 @@ int bdrv_get_backing_file_depth(BlockDriverState *bs) return 1 + bdrv_get_backing_file_depth(bs->backing_hd); } -BlockDriverState *bdrv_find_base(BlockDriverState *bs) -{ - BlockDriverState *curr_bs = NULL; - - if (!bs) { - return NULL; - } - - curr_bs = bs; - - while (curr_bs->backing_hd) { - curr_bs = curr_bs->backing_hd; - } - return curr_bs; -} - /**************************************************************/ /* async I/Os */ From 6764579f894950afe87d8ec3b323adde8925d4fd Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 30 Jun 2014 19:03:37 +0100 Subject: [PATCH 18/23] block/cow: Avoid use of uninitialized cow_bs in error path Commit 25814e8987 introduced an error-exit code path which does a "goto exit" before the cow_bs variable is initialized, meaning we would call bdrv_unref() on an uninitialized variable and likely segfault. Fix this by moving the NULL-initialization to the top of the function and making the exit code path handle the case where it is NULL. Signed-off-by: Peter Maydell Reviewed-by: Eric Blake Signed-off-by: Stefan Hajnoczi --- block/cow.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/block/cow.c b/block/cow.c index 8f81ee6d56..6ee483327f 100644 --- a/block/cow.c +++ b/block/cow.c @@ -332,7 +332,7 @@ static int cow_create(const char *filename, QemuOpts *opts, Error **errp) char *image_filename = NULL; Error *local_err = NULL; int ret; - BlockDriverState *cow_bs; + BlockDriverState *cow_bs = NULL; /* Read out options */ image_sectors = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / 512; @@ -344,7 +344,6 @@ static int cow_create(const char *filename, QemuOpts *opts, Error **errp) goto exit; } - cow_bs = NULL; ret = bdrv_open(&cow_bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err); if (ret < 0) { @@ -383,7 +382,9 @@ static int cow_create(const char *filename, QemuOpts *opts, Error **errp) exit: g_free(image_filename); - bdrv_unref(cow_bs); + if (cow_bs) { + bdrv_unref(cow_bs); + } return ret; } From 4e855baabfb548067b618808c32dc413d40e0bc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Canet?= Date: Mon, 30 Jun 2014 17:05:41 +0200 Subject: [PATCH 19/23] qapi: Change back sector-count to sectors-count in quorum QAPI events. fe069d9d had aligned code and documentation while dropping the s from the actual JSON output. Fix that. This also fix test/qemu-iotest/081 since the missing s was causing a permutation. Signed-off-by: Benoit Canet Reviewed-by: Eric Blake Signed-off-by: Stefan Hajnoczi --- docs/qmp/qmp-events.txt | 26 +++++++++++++------------- qapi/event.json | 8 ++++---- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/qmp/qmp-events.txt b/docs/qmp/qmp-events.txt index 44be891261..4a6c2a2c45 100644 --- a/docs/qmp/qmp-events.txt +++ b/docs/qmp/qmp-events.txt @@ -250,14 +250,14 @@ Emitted by the Quorum block driver if it fails to establish a quorum. Data: -- "reference": device name if defined else node name. -- "sector-num": Number of the first sector of the failed read operation. -- "sector-count": Failed read operation sector count. +- "reference": device name if defined else node name. +- "sector-num": Number of the first sector of the failed read operation. +- "sectors-count": Failed read operation sector count. Example: { "event": "QUORUM_FAILURE", - "data": { "reference": "usr1", "sector-num": 345435, "sector-count": 5 }, + "data": { "reference": "usr1", "sector-num": 345435, "sectors-count": 5 }, "timestamp": { "seconds": 1344522075, "microseconds": 745528 } } QUORUM_REPORT_BAD @@ -267,19 +267,19 @@ Emitted to report a corruption of a Quorum file. Data: -- "error": Error message (json-string, optional) - Only present on failure. This field contains a human-readable - error message. There are no semantics other than that the - block layer reported an error and clients should not try to - interpret the error string. -- "node-name": The graph node name of the block driver state. -- "sector-num": Number of the first sector of the failed read operation. -- "sector-count": Failed read operation sector count. +- "error": Error message (json-string, optional) + Only present on failure. This field contains a human-readable + error message. There are no semantics other than that the + block layer reported an error and clients should not try to + interpret the error string. +- "node-name": The graph node name of the block driver state. +- "sector-num": Number of the first sector of the failed read operation. +- "sectors-count": Failed read operation sector count. Example: { "event": "QUORUM_REPORT_BAD", - "data": { "node-name": "1.raw", "sector-num": 345435, "sector-count": 5 }, + "data": { "node-name": "1.raw", "sector-num": 345435, "sectors-count": 5 }, "timestamp": { "seconds": 1344522075, "microseconds": 745528 } } RESET diff --git a/qapi/event.json b/qapi/event.json index ff97aeb377..c51dc491e0 100644 --- a/qapi/event.json +++ b/qapi/event.json @@ -288,12 +288,12 @@ # # @sector-num: number of the first sector of the failed read operation # -# @sector-count: failed read operation sector count +# @sectors-count: failed read operation sector count # # Since: 2.0 ## { 'event': 'QUORUM_FAILURE', - 'data': { 'reference': 'str', 'sector-num': 'int', 'sector-count': 'int' } } + 'data': { 'reference': 'str', 'sector-num': 'int', 'sectors-count': 'int' } } ## # @QUORUM_REPORT_BAD @@ -309,13 +309,13 @@ # # @sector-num: number of the first sector of the failed read operation # -# @sector-count: failed read operation sector count +# @sectors-count: failed read operation sector count # # Since: 2.0 ## { 'event': 'QUORUM_REPORT_BAD', 'data': { '*error': 'str', 'node-name': 'str', - 'sector-num': 'int', 'sector-count': 'int' } } + 'sector-num': 'int', 'sectors-count': 'int' } } ## # @VSERPORT_CHANGE From fa40e65622352012dccd435147f28161375a6815 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Tue, 1 Jul 2014 09:52:16 +0200 Subject: [PATCH 20/23] block: add QAPI command to allow live backing file change This allows a user to make a live change to the backing file recorded in an open image. The image file to modify can be specified 2 ways: 1) image filename 2) image node-name Note: this does not cause the backing file itself to be reopened; it merely changes the backing filename in the image file structure, and in internal BDS structures. It is the responsibility of the user to pass a filename string that can be resolved when the image chain is reopened, and the filename string is not validated. A good analogy for this command is that it is a live version of 'qemu-img rebase -u', with respect to changing the backing file string. [Jeff is offline so I respun this patch in his absence. Dropped image filename since using node-name is preferred and this is a new command. No need to introduce the limitations of finding images by filename. --Stefan] Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- blockdev.c | 79 ++++++++++++++++++++++++++++++++++++++++++++ qapi/block-core.json | 26 +++++++++++++++ qmp-commands.hx | 39 ++++++++++++++++++++++ 3 files changed, 144 insertions(+) diff --git a/blockdev.c b/blockdev.c index 79ce52b58b..57373d3661 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2367,6 +2367,85 @@ void qmp_block_job_complete(const char *device, Error **errp) block_job_complete(job, errp); } +void qmp_change_backing_file(const char *device, + const char *image_node_name, + const char *backing_file, + Error **errp) +{ + BlockDriverState *bs = NULL; + BlockDriverState *image_bs = NULL; + Error *local_err = NULL; + bool ro; + int open_flags; + int ret; + + /* find the top layer BDS of the chain */ + bs = bdrv_find(device); + if (!bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, device); + return; + } + + image_bs = bdrv_lookup_bs(NULL, image_node_name, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + if (!image_bs) { + error_setg(errp, "image file not found"); + return; + } + + if (bdrv_find_base(image_bs) == image_bs) { + error_setg(errp, "not allowing backing file change on an image " + "without a backing file"); + return; + } + + /* even though we are not necessarily operating on bs, we need it to + * determine if block ops are currently prohibited on the chain */ + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_CHANGE, errp)) { + return; + } + + /* final sanity check */ + if (!bdrv_chain_contains(bs, image_bs)) { + error_setg(errp, "'%s' and image file are not in the same chain", + device); + return; + } + + /* if not r/w, reopen to make r/w */ + open_flags = image_bs->open_flags; + ro = bdrv_is_read_only(image_bs); + + if (ro) { + bdrv_reopen(image_bs, open_flags | BDRV_O_RDWR, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + + ret = bdrv_change_backing_file(image_bs, backing_file, + image_bs->drv ? image_bs->drv->format_name : ""); + + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not change backing file to '%s'", + backing_file); + /* don't exit here, so we can try to restore open flags if + * appropriate */ + } + + if (ro) { + bdrv_reopen(image_bs, open_flags, &local_err); + if (local_err) { + error_propagate(errp, local_err); /* will preserve prior errp */ + } + } +} + void qmp_blockdev_add(BlockdevOptions *options, Error **errp) { QmpOutputVisitor *ov = qmp_output_visitor_new(); diff --git a/qapi/block-core.json b/qapi/block-core.json index 6a697f1458..aa1252778c 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -679,6 +679,32 @@ { 'command': 'blockdev-snapshot-sync', 'data': 'BlockdevSnapshot' } +## +# @change-backing-file +# +# Change the backing file in the image file metadata. This does not +# cause QEMU to reopen the image file to reparse the backing filename +# (it may, however, perform a reopen to change permissions from +# r/o -> r/w -> r/o, if needed). The new backing file string is written +# into the image file metadata, and the QEMU internal strings are +# updated. +# +# @image-node-name: The name of the block driver state node of the +# image to modify. +# +# @device: The name of the device that owns image-node-name. +# +# @backing-file: The string to write as the backing file. This +# string is not validated, so care should be taken +# when specifying the string or the image chain may +# not be able to be reopened again. +# +# Since: 2.1 +## +{ 'command': 'change-backing-file', + 'data': { 'device': 'str', 'image-node-name': 'str', + 'backing-file': 'str' } } + ## # @block-commit # diff --git a/qmp-commands.hx b/qmp-commands.hx index 8c5fdb5ca1..8534948e3f 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -1350,6 +1350,45 @@ Example: "format": "qcow2" } } <- { "return": {} } +EQMP + + { + .name = "change-backing-file", + .args_type = "device:s,image-node-name:s,backing-file:s", + .mhandler.cmd_new = qmp_marshal_input_change_backing_file, + }, + +SQMP +change-backing-file +------------------- +Since: 2.1 + +Change the backing file in the image file metadata. This does not cause +QEMU to reopen the image file to reparse the backing filename (it may, +however, perform a reopen to change permissions from r/o -> r/w -> r/o, +if needed). The new backing file string is written into the image file +metadata, and the QEMU internal strings are updated. + +Arguments: + +- "image-node-name": The name of the block driver state node of the + image to modify. The "device" is argument is used to + verify "image-node-name" is in the chain described by + "device". + (json-string, optional) + +- "device": The name of the device. + (json-string) + +- "backing-file": The string to write as the backing file. This string is + not validated, so care should be taken when specifying + the string or the image chain may not be able to be + reopened again. + (json-string) + +Returns: Nothing on success + If "device" does not exist or cannot be determined, DeviceNotFound + EQMP { From 5a6684d2b957f9ec75d7ed7b14332293abec1d6c Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 25 Jun 2014 15:40:09 -0400 Subject: [PATCH 21/23] block: add helper function to determine if a BDS is in a chain This is a small helper function, to determine if 'base' is in the chain of BlockDriverState 'top'. It returns true if it is in the chain, and false otherwise. If either argument is NULL, it will also return false. Reviewed-by: Benoit Canet Reviewed-by: Eric Blake Signed-off-by: Jeff Cody Reviewed-by: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- block.c | 11 +++++++++++ include/block/block.h | 1 + 2 files changed, 12 insertions(+) diff --git a/block.c b/block.c index c111c297f7..f45e63c88b 100644 --- a/block.c +++ b/block.c @@ -3774,6 +3774,17 @@ BlockDriverState *bdrv_lookup_bs(const char *device, return NULL; } +/* If 'base' is in the same chain as 'top', return true. Otherwise, + * return false. If either argument is NULL, return false. */ +bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) +{ + while (top && top != base) { + top = top->backing_hd; + } + + return top != NULL; +} + BlockDriverState *bdrv_next(BlockDriverState *bs) { if (!bs) { diff --git a/include/block/block.h b/include/block/block.h index 7e92f549fb..29c9e50ddb 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -403,6 +403,7 @@ BlockDeviceInfoList *bdrv_named_nodes_list(void); BlockDriverState *bdrv_lookup_bs(const char *device, const char *node_name, Error **errp); +bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base); BlockDriverState *bdrv_next(BlockDriverState *bs); void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque); From 54e269009099cdc9483be115f1e12d56ad459c5e Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 25 Jun 2014 15:40:10 -0400 Subject: [PATCH 22/23] block: extend block-commit to accept a string for the backing file On some image chains, QEMU may not always be able to resolve the filenames properly, when updating the backing file of an image after a block commit. For instance, certain relative pathnames may fail, or drives may have been specified originally by file descriptor (e.g. /dev/fd/???), or a relative protocol pathname may have been used. In these instances, QEMU may lack the information to be able to make the correct choice, but the user or management layer most likely does have that knowledge. With this extension to the block-commit api, the user is able to change the backing file of the overlay image as part of the block-commit operation. This allows the change to be 'safe', in the sense that if the attempt to write the overlay image metadata fails, then the block-commit operation returns failure, without disrupting the guest. If the commit top is the active layer, then specifying the backing file string will be treated as an error (there is no overlay image to modify in that case). If a backing file string is not specified in the command, the backing file string to use is determined in the same manner as it was previously. Reviewed-by: Eric Blake Signed-off-by: Jeff Cody Reviewed-by: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- block.c | 8 ++++++-- block/commit.c | 9 ++++++--- blockdev.c | 8 +++++++- include/block/block.h | 3 ++- include/block/block_int.h | 3 ++- qapi/block-core.json | 20 ++++++++++++++++++-- qmp-commands.hx | 19 ++++++++++++++++++- 7 files changed, 59 insertions(+), 11 deletions(-) diff --git a/block.c b/block.c index f45e63c88b..f80e2b2b58 100644 --- a/block.c +++ b/block.c @@ -2555,12 +2555,15 @@ typedef struct BlkIntermediateStates { * * base <- active * + * If backing_file_str is non-NULL, it will be used when modifying top's + * overlay image metadata. + * * Error conditions: * if active == top, that is considered an error * */ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, - BlockDriverState *base) + BlockDriverState *base, const char *backing_file_str) { BlockDriverState *intermediate; BlockDriverState *base_bs = NULL; @@ -2612,7 +2615,8 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, } /* success - we can delete the intermediate states, and link top->base */ - ret = bdrv_change_backing_file(new_top_bs, base_bs->filename, + backing_file_str = backing_file_str ? backing_file_str : base_bs->filename; + ret = bdrv_change_backing_file(new_top_bs, backing_file_str, base_bs->drv ? base_bs->drv->format_name : ""); if (ret) { goto exit; diff --git a/block/commit.c b/block/commit.c index 5c09f4444e..91517d3512 100644 --- a/block/commit.c +++ b/block/commit.c @@ -37,6 +37,7 @@ typedef struct CommitBlockJob { BlockdevOnError on_error; int base_flags; int orig_overlay_flags; + char *backing_file_str; } CommitBlockJob; static int coroutine_fn commit_populate(BlockDriverState *bs, @@ -141,7 +142,7 @@ wait: if (!block_job_is_cancelled(&s->common) && sector_num == end) { /* success */ - ret = bdrv_drop_intermediate(active, top, base); + ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str); } exit_free_buf: @@ -158,7 +159,7 @@ exit_restore_reopen: if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) { bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL); } - + g_free(s->backing_file_str); block_job_completed(&s->common, ret); } @@ -182,7 +183,7 @@ static const BlockJobDriver commit_job_driver = { void commit_start(BlockDriverState *bs, BlockDriverState *base, BlockDriverState *top, int64_t speed, BlockdevOnError on_error, BlockDriverCompletionFunc *cb, - void *opaque, Error **errp) + void *opaque, const char *backing_file_str, Error **errp) { CommitBlockJob *s; BlockReopenQueue *reopen_queue = NULL; @@ -244,6 +245,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, s->base_flags = orig_base_flags; s->orig_overlay_flags = orig_overlay_flags; + s->backing_file_str = g_strdup(backing_file_str); + s->on_error = on_error; s->common.co = qemu_coroutine_create(commit_run); diff --git a/blockdev.c b/blockdev.c index 57373d3661..48315e86c0 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1915,6 +1915,7 @@ void qmp_block_stream(const char *device, bool has_base, void qmp_block_commit(const char *device, bool has_base, const char *base, bool has_top, const char *top, + bool has_backing_file, const char *backing_file, bool has_speed, int64_t speed, Error **errp) { @@ -1980,11 +1981,16 @@ void qmp_block_commit(const char *device, } if (top_bs == bs) { + if (has_backing_file) { + error_setg(errp, "'backing-file' specified," + " but 'top' is the active layer"); + return; + } commit_active_start(bs, base_bs, speed, on_error, block_job_cb, bs, &local_err); } else { commit_start(bs, base_bs, top_bs, speed, on_error, block_job_cb, bs, - &local_err); + has_backing_file ? backing_file : NULL, &local_err); } if (local_err != NULL) { error_propagate(errp, local_err); diff --git a/include/block/block.h b/include/block/block.h index 29c9e50ddb..baecc26dfc 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -285,7 +285,8 @@ int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, const char *backing_fmt); void bdrv_register(BlockDriver *bdrv); int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, - BlockDriverState *base); + BlockDriverState *base, + const char *backing_file_str); BlockDriverState *bdrv_find_overlay(BlockDriverState *active, BlockDriverState *bs); BlockDriverState *bdrv_find_base(BlockDriverState *bs); diff --git a/include/block/block_int.h b/include/block/block_int.h index eaf6e313d5..8f8e65e763 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -463,13 +463,14 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base, * @on_error: The action to take upon error. * @cb: Completion function for the job. * @opaque: Opaque pointer value passed to @cb. + * @backing_file_str: String to use as the backing file in @top's overlay * @errp: Error object. * */ void commit_start(BlockDriverState *bs, BlockDriverState *base, BlockDriverState *top, int64_t speed, BlockdevOnError on_error, BlockDriverCompletionFunc *cb, - void *opaque, Error **errp); + void *opaque, const char *backing_file_str, Error **errp); /** * commit_active_start: * @bs: Active block device to be committed. diff --git a/qapi/block-core.json b/qapi/block-core.json index aa1252778c..5b4d75fa81 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -720,6 +720,23 @@ # which contains the topmost data to be committed down. If # not specified, this is the active layer. # +# @backing-file: #optional The backing file string to write into the overlay +# image of 'top'. If 'top' is the active layer, +# specifying a backing file string is an error. This +# filename is not validated. +# +# If a pathname string is such that it cannot be +# resolved by QEMU, that means that subsequent QMP or +# HMP commands must use node-names for the image in +# question, as filename lookup methods will fail. +# +# If not specified, QEMU will automatically determine +# the backing file string to use, or error out if +# there is no obvious choice. Care should be taken +# when specifying the string, to specify a valid +# filename or protocol. +# (Since 2.1) +# # If top == base, that is an error. # If top == active, the job will not be completed by itself, # user needs to complete the job with the block-job-complete @@ -732,7 +749,6 @@ # size of the smaller top, you can safely truncate it # yourself once the commit operation successfully completes. # -# # @speed: #optional the maximum speed, in bytes per second # # Returns: Nothing on success @@ -747,7 +763,7 @@ ## { 'command': 'block-commit', 'data': { 'device': 'str', '*base': 'str', '*top': 'str', - '*speed': 'int' } } + '*backing-file': 'str', '*speed': 'int' } } ## # @drive-backup diff --git a/qmp-commands.hx b/qmp-commands.hx index 8534948e3f..a3f932cb46 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -985,7 +985,7 @@ EQMP { .name = "block-commit", - .args_type = "device:B,base:s?,top:s?,speed:o?", + .args_type = "device:B,base:s?,top:s?,backing-file:s?,speed:o?", .mhandler.cmd_new = qmp_marshal_input_block_commit, }, @@ -1006,6 +1006,23 @@ Arguments: which contains the topmost data to be committed down. If not specified, this is the active layer. (json-string, optional) +- backing-file: The backing file string to write into the overlay + image of 'top'. If 'top' is the active layer, + specifying a backing file string is an error. This + filename is not validated. + + If a pathname string is such that it cannot be + resolved by QEMU, that means that subsequent QMP or + HMP commands must use node-names for the image in + question, as filename lookup methods will fail. + + If not specified, QEMU will automatically determine + the backing file string to use, or error out if + there is no obvious choice. Care should be taken + when specifying the string, to specify a valid + filename or protocol. + (json-string, optional) (Since 2.1) + If top == base, that is an error. If top == active, the job will not be completed by itself, user needs to complete the job with the block-job-complete From 13d8cc515dfcf5574077f964332d34890c0101d0 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 25 Jun 2014 15:40:11 -0400 Subject: [PATCH 23/23] block: add backing-file option to block-stream On some image chains, QEMU may not always be able to resolve the filenames properly, when updating the backing file of an image after a block job. For instance, certain relative pathnames may fail, or drives may have been specified originally by file descriptor (e.g. /dev/fd/???), or a relative protocol pathname may have been used. In these instances, QEMU may lack the information to be able to make the correct choice, but the user or management layer most likely does have that knowledge. With this extension to the block-stream api, the user is able to change the backing file of the active layer as part of the block-stream operation. This allows the change to be 'safe', in the sense that if the attempt to write the active image metadata fails, then the block-stream operation returns failure, without disrupting the guest. If a backing file string is not specified in the command, the backing file string to use is determined in the same manner as it was previously. Reviewed-by: Eric Blake Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- block/stream.c | 11 +++++------ blockdev.c | 23 +++++++++++++++++++---- hmp.c | 2 +- qapi/block-core.json | 19 +++++++++++++++++-- qmp-commands.hx | 2 +- 5 files changed, 43 insertions(+), 14 deletions(-) diff --git a/block/stream.c b/block/stream.c index 043340994d..34de8ba0d9 100644 --- a/block/stream.c +++ b/block/stream.c @@ -32,7 +32,7 @@ typedef struct StreamBlockJob { RateLimit limit; BlockDriverState *base; BlockdevOnError on_error; - char backing_file_id[1024]; + char *backing_file_str; } StreamBlockJob; static int coroutine_fn stream_populate(BlockDriverState *bs, @@ -186,7 +186,7 @@ wait: if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) { const char *base_id = NULL, *base_fmt = NULL; if (base) { - base_id = s->backing_file_id; + base_id = s->backing_file_str; if (base->drv) { base_fmt = base->drv->format_name; } @@ -196,6 +196,7 @@ wait: } qemu_vfree(buf); + g_free(s->backing_file_str); block_job_completed(&s->common, ret); } @@ -217,7 +218,7 @@ static const BlockJobDriver stream_job_driver = { }; void stream_start(BlockDriverState *bs, BlockDriverState *base, - const char *base_id, int64_t speed, + const char *backing_file_str, int64_t speed, BlockdevOnError on_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp) @@ -237,9 +238,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base, } s->base = base; - if (base_id) { - pstrcpy(s->backing_file_id, sizeof(s->backing_file_id), base_id); - } + s->backing_file_str = g_strdup(backing_file_str); s->on_error = on_error; s->common.co = qemu_coroutine_create(stream_run); diff --git a/blockdev.c b/blockdev.c index 48315e86c0..48bd9a37bc 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1871,14 +1871,17 @@ static void block_job_cb(void *opaque, int ret) bdrv_put_ref_bh_schedule(bs); } -void qmp_block_stream(const char *device, bool has_base, - const char *base, bool has_speed, int64_t speed, +void qmp_block_stream(const char *device, + bool has_base, const char *base, + bool has_backing_file, const char *backing_file, + bool has_speed, int64_t speed, bool has_on_error, BlockdevOnError on_error, Error **errp) { BlockDriverState *bs; BlockDriverState *base_bs = NULL; Error *local_err = NULL; + const char *base_name = NULL; if (!has_on_error) { on_error = BLOCKDEV_ON_ERROR_REPORT; @@ -1894,15 +1897,27 @@ void qmp_block_stream(const char *device, bool has_base, return; } - if (base) { + if (has_base) { base_bs = bdrv_find_backing_image(bs, base); if (base_bs == NULL) { error_set(errp, QERR_BASE_NOT_FOUND, base); return; } + base_name = base; } - stream_start(bs, base_bs, base, has_speed ? speed : 0, + /* if we are streaming the entire chain, the result will have no backing + * file, and specifying one is therefore an error */ + if (base_bs == NULL && has_backing_file) { + error_setg(errp, "backing file specified, but streaming the " + "entire chain"); + return; + } + + /* backing_file string overrides base bs filename */ + base_name = has_backing_file ? backing_file : base_name; + + stream_start(bs, base_bs, base_name, has_speed ? speed : 0, on_error, block_job_cb, bs, &local_err); if (local_err) { error_propagate(errp, local_err); diff --git a/hmp.c b/hmp.c index 6429e6b447..4d1838e9ea 100644 --- a/hmp.c +++ b/hmp.c @@ -1176,7 +1176,7 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict) const char *base = qdict_get_try_str(qdict, "base"); int64_t speed = qdict_get_try_int(qdict, "speed", 0); - qmp_block_stream(device, base != NULL, base, + qmp_block_stream(device, base != NULL, base, false, NULL, qdict_haskey(qdict, "speed"), speed, true, BLOCKDEV_ON_ERROR_REPORT, &error); diff --git a/qapi/block-core.json b/qapi/block-core.json index 5b4d75fa81..e378653a77 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -922,6 +922,21 @@ # # @base: #optional the common backing file name # +# @backing-file: #optional The backing file string to write into the active +# layer. This filename is not validated. +# +# If a pathname string is such that it cannot be +# resolved by QEMU, that means that subsequent QMP or +# HMP commands must use node-names for the image in +# question, as filename lookup methods will fail. +# +# If not specified, QEMU will automatically determine +# the backing file string to use, or error out if there +# is no obvious choice. Care should be taken when +# specifying the string, to specify a valid filename or +# protocol. +# (Since 2.1) +# # @speed: #optional the maximum speed, in bytes per second # # @on-error: #optional the action to take on an error (default report). @@ -934,8 +949,8 @@ # Since: 1.1 ## { 'command': 'block-stream', - 'data': { 'device': 'str', '*base': 'str', '*speed': 'int', - '*on-error': 'BlockdevOnError' } } + 'data': { 'device': 'str', '*base': 'str', '*backing-file': 'str', + '*speed': 'int', '*on-error': 'BlockdevOnError' } } ## # @block-job-set-speed: diff --git a/qmp-commands.hx b/qmp-commands.hx index a3f932cb46..4be4765f27 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -979,7 +979,7 @@ EQMP { .name = "block-stream", - .args_type = "device:B,base:s?,speed:o?,on-error:s?", + .args_type = "device:B,base:s?,speed:o?,backing-file:s?,on-error:s?", .mhandler.cmd_new = qmp_marshal_input_block_stream, },