From 6275989647efb708f126eb4f880e593792301ed4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Thu, 4 Jan 2024 21:09:35 +0000 Subject: [PATCH 01/60] virtio: split into vhost-user-base and vhost-user-device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lets keep a cleaner split between the base class and the derived vhost-user-device which we can use for generic vhost-user stubs. This includes an update to introduce the vq_size property so the number of entries in a virtq can be defined. Signed-off-by: Alex Bennée Message-Id: <20240104210945.1223134-2-alex.bennee@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 6 + hw/virtio/meson.build | 1 + hw/virtio/vhost-user-base.c | 346 ++++++++++++++++++ hw/virtio/vhost-user-device-pci.c | 13 +- hw/virtio/vhost-user-device.c | 338 +---------------- ...{vhost-user-device.h => vhost-user-base.h} | 21 +- 6 files changed, 383 insertions(+), 342 deletions(-) create mode 100644 hw/virtio/vhost-user-base.c rename include/hw/virtio/{vhost-user-device.h => vhost-user-base.h} (71%) diff --git a/MAINTAINERS b/MAINTAINERS index 2f9741b898..2426368c4d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2319,6 +2319,12 @@ F: include/sysemu/rng*.h F: backends/rng*.c F: tests/qtest/virtio-rng-test.c +vhost-user-stubs +M: Alex Bennée +S: Maintained +F: hw/virtio/vhost-user-base.c +F: hw/virtio/vhost-user-device* + vhost-user-rng M: Mathieu Poirier S: Supported diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 47baf00366..3ccddbe950 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -17,6 +17,7 @@ if have_vhost if have_vhost_user # fixme - this really should be generic specific_virtio_ss.add(files('vhost-user.c')) + system_virtio_ss.add(files('vhost-user-base.c')) system_virtio_ss.add(files('vhost-user-device.c')) system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) endif diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c new file mode 100644 index 0000000000..620fa5cb4a --- /dev/null +++ b/hw/virtio/vhost-user-base.c @@ -0,0 +1,346 @@ +/* + * Base vhost-user-base implementation. This can be used to derive a + * more fully specified vhost-user backend either generically (see + * vhost-user-device) or via a specific stub for a device which + * encapsulates some fixed parameters. + * + * Copyright (c) 2023 Linaro Ltd + * Author: Alex Bennée + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/vhost-user-base.h" +#include "qemu/error-report.h" + +static void vub_start(VirtIODevice *vdev) +{ + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + VHostUserBase *vub = VHOST_USER_BASE(vdev); + int ret, i; + + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers"); + return; + } + + ret = vhost_dev_enable_notifiers(&vub->vhost_dev, vdev); + if (ret < 0) { + error_report("Error enabling host notifiers: %d", -ret); + return; + } + + ret = k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, true); + if (ret < 0) { + error_report("Error binding guest notifier: %d", -ret); + goto err_host_notifiers; + } + + vub->vhost_dev.acked_features = vdev->guest_features; + + ret = vhost_dev_start(&vub->vhost_dev, vdev, true); + if (ret < 0) { + error_report("Error starting vhost-user-base: %d", -ret); + goto err_guest_notifiers; + } + + /* + * guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ + for (i = 0; i < vub->vhost_dev.nvqs; i++) { + vhost_virtqueue_mask(&vub->vhost_dev, vdev, i, false); + } + + return; + +err_guest_notifiers: + k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, false); +err_host_notifiers: + vhost_dev_disable_notifiers(&vub->vhost_dev, vdev); +} + +static void vub_stop(VirtIODevice *vdev) +{ + VHostUserBase *vub = VHOST_USER_BASE(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + + if (!k->set_guest_notifiers) { + return; + } + + vhost_dev_stop(&vub->vhost_dev, vdev, true); + + ret = k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, false); + if (ret < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + return; + } + + vhost_dev_disable_notifiers(&vub->vhost_dev, vdev); +} + +static void vub_set_status(VirtIODevice *vdev, uint8_t status) +{ + VHostUserBase *vub = VHOST_USER_BASE(vdev); + bool should_start = virtio_device_should_start(vdev, status); + + if (vhost_dev_is_started(&vub->vhost_dev) == should_start) { + return; + } + + if (should_start) { + vub_start(vdev); + } else { + vub_stop(vdev); + } +} + +/* + * For an implementation where everything is delegated to the backend + * we don't do anything other than return the full feature set offered + * by the daemon (module the reserved feature bit). + */ +static uint64_t vub_get_features(VirtIODevice *vdev, + uint64_t requested_features, Error **errp) +{ + VHostUserBase *vub = VHOST_USER_BASE(vdev); + /* This should be set when the vhost connection initialises */ + g_assert(vub->vhost_dev.features); + return vub->vhost_dev.features & ~(1ULL << VHOST_USER_F_PROTOCOL_FEATURES); +} + +/* + * To handle VirtIO config we need to know the size of the config + * space. We don't cache the config but re-fetch it from the guest + * every time in case something has changed. + */ +static void vub_get_config(VirtIODevice *vdev, uint8_t *config) +{ + VHostUserBase *vub = VHOST_USER_BASE(vdev); + Error *local_err = NULL; + + /* + * There will have been a warning during vhost_dev_init, but lets + * assert here as nothing will go right now. + */ + g_assert(vub->config_size && vub->vhost_user.supports_config == true); + + if (vhost_dev_get_config(&vub->vhost_dev, config, + vub->config_size, &local_err)) { + error_report_err(local_err); + } +} + +/* + * When the daemon signals an update to the config we just need to + * signal the guest as we re-read the config on demand above. + */ +static int vub_config_notifier(struct vhost_dev *dev) +{ + virtio_notify_config(dev->vdev); + return 0; +} + +const VhostDevConfigOps vub_config_ops = { + .vhost_dev_config_notifier = vub_config_notifier, +}; + +static void vub_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + /* + * Not normally called; it's the daemon that handles the queue; + * however virtio's cleanup path can call this. + */ +} + +static void do_vhost_user_cleanup(VirtIODevice *vdev, VHostUserBase *vub) +{ + vhost_user_cleanup(&vub->vhost_user); + + for (int i = 0; i < vub->num_vqs; i++) { + VirtQueue *vq = g_ptr_array_index(vub->vqs, i); + virtio_delete_queue(vq); + } + + virtio_cleanup(vdev); +} + +static int vub_connect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBase *vub = VHOST_USER_BASE(vdev); + struct vhost_dev *vhost_dev = &vub->vhost_dev; + + if (vub->connected) { + return 0; + } + vub->connected = true; + + /* + * If we support VHOST_USER_GET_CONFIG we must enable the notifier + * so we can ping the guest when it updates. + */ + if (vub->vhost_user.supports_config) { + vhost_dev_set_config_notifier(vhost_dev, &vub_config_ops); + } + + /* restore vhost state */ + if (virtio_device_started(vdev, vdev->status)) { + vub_start(vdev); + } + + return 0; +} + +static void vub_disconnect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBase *vub = VHOST_USER_BASE(vdev); + + if (!vub->connected) { + return; + } + vub->connected = false; + + if (vhost_dev_is_started(&vub->vhost_dev)) { + vub_stop(vdev); + } +} + +static void vub_event(void *opaque, QEMUChrEvent event) +{ + DeviceState *dev = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBase *vub = VHOST_USER_BASE(vdev); + + switch (event) { + case CHR_EVENT_OPENED: + if (vub_connect(dev) < 0) { + qemu_chr_fe_disconnect(&vub->chardev); + return; + } + break; + case CHR_EVENT_CLOSED: + vub_disconnect(dev); + break; + case CHR_EVENT_BREAK: + case CHR_EVENT_MUX_IN: + case CHR_EVENT_MUX_OUT: + /* Ignore */ + break; + } +} + +static void vub_device_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBase *vub = VHOST_USER_BASE(dev); + int ret; + + if (!vub->chardev.chr) { + error_setg(errp, "vhost-user-base: missing chardev"); + return; + } + + if (!vub->virtio_id) { + error_setg(errp, "vhost-user-base: need to define device id"); + return; + } + + if (!vub->num_vqs) { + vub->num_vqs = 1; /* reasonable default? */ + } + + if (!vub->vq_size) { + vub->vq_size = 64; + } + + /* + * We can't handle config requests unless we know the size of the + * config region, specialisations of the vhost-user-base will be + * able to set this. + */ + if (vub->config_size) { + vub->vhost_user.supports_config = true; + } + + if (!vhost_user_init(&vub->vhost_user, &vub->chardev, errp)) { + return; + } + + virtio_init(vdev, vub->virtio_id, vub->config_size); + + /* + * Disable guest notifiers, by default all notifications will be via the + * asynchronous vhost-user socket. + */ + vdev->use_guest_notifier_mask = false; + + /* Allocate queues */ + vub->vqs = g_ptr_array_sized_new(vub->num_vqs); + for (int i = 0; i < vub->num_vqs; i++) { + g_ptr_array_add(vub->vqs, + virtio_add_queue(vdev, vub->vq_size, + vub_handle_output)); + } + + vub->vhost_dev.nvqs = vub->num_vqs; + vub->vhost_dev.vqs = g_new0(struct vhost_virtqueue, vub->vhost_dev.nvqs); + + /* connect to backend */ + ret = vhost_dev_init(&vub->vhost_dev, &vub->vhost_user, + VHOST_BACKEND_TYPE_USER, 0, errp); + + if (ret < 0) { + do_vhost_user_cleanup(vdev, vub); + } + + qemu_chr_fe_set_handlers(&vub->chardev, NULL, NULL, vub_event, NULL, + dev, NULL, true); +} + +static void vub_device_unrealize(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBase *vub = VHOST_USER_BASE(dev); + struct vhost_virtqueue *vhost_vqs = vub->vhost_dev.vqs; + + /* This will stop vhost backend if appropriate. */ + vub_set_status(vdev, 0); + vhost_dev_cleanup(&vub->vhost_dev); + g_free(vhost_vqs); + do_vhost_user_cleanup(vdev, vub); +} + +static void vub_class_init(ObjectClass *klass, void *data) +{ + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + vdc->realize = vub_device_realize; + vdc->unrealize = vub_device_unrealize; + vdc->get_features = vub_get_features; + vdc->get_config = vub_get_config; + vdc->set_status = vub_set_status; +} + +static const TypeInfo vub_types[] = { + { + .name = TYPE_VHOST_USER_BASE, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VHostUserBase), + .class_init = vub_class_init, + .class_size = sizeof(VHostUserBaseClass), + .abstract = true + } +}; + +DEFINE_TYPES(vub_types) diff --git a/hw/virtio/vhost-user-device-pci.c b/hw/virtio/vhost-user-device-pci.c index 41f9b7905b..efaf55d3dd 100644 --- a/hw/virtio/vhost-user-device-pci.c +++ b/hw/virtio/vhost-user-device-pci.c @@ -9,21 +9,18 @@ #include "qemu/osdep.h" #include "hw/qdev-properties.h" -#include "hw/virtio/vhost-user-device.h" +#include "hw/virtio/vhost-user-base.h" #include "hw/virtio/virtio-pci.h" struct VHostUserDevicePCI { VirtIOPCIProxy parent_obj; + VHostUserBase vub; }; -typedef struct VHostUserDevicePCI VHostUserDevicePCI; - #define TYPE_VHOST_USER_DEVICE_PCI "vhost-user-device-pci-base" -DECLARE_INSTANCE_CHECKER(VHostUserDevicePCI, - VHOST_USER_DEVICE_PCI, - TYPE_VHOST_USER_DEVICE_PCI) +OBJECT_DECLARE_SIMPLE_TYPE(VHostUserDevicePCI, VHOST_USER_DEVICE_PCI) static void vhost_user_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) { @@ -39,6 +36,10 @@ static void vhost_user_device_pci_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + + /* Reason: stop users confusing themselves */ + dc->user_creatable = false; + k->realize = vhost_user_device_pci_realize; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; diff --git a/hw/virtio/vhost-user-device.c b/hw/virtio/vhost-user-device.c index 2b028cae08..67aa934710 100644 --- a/hw/virtio/vhost-user-device.c +++ b/hw/virtio/vhost-user-device.c @@ -1,7 +1,10 @@ /* - * Generic vhost-user stub. This can be used to connect to any - * vhost-user backend. All configuration details must be handled by - * the vhost-user daemon itself + * Generic vhost-user-device implementation for any vhost-user-backend + * + * This is a concrete implementation of vhost-user-base which can be + * configured via properties. It is useful for development and + * prototyping. It expects configuration details (if any) to be + * handled by the vhost-user daemon itself. * * Copyright (c) 2023 Linaro Ltd * Author: Alex Bennée @@ -13,329 +16,9 @@ #include "qapi/error.h" #include "hw/qdev-properties.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/vhost-user-device.h" +#include "hw/virtio/vhost-user-base.h" #include "qemu/error-report.h" -static void vub_start(VirtIODevice *vdev) -{ - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - VHostUserBase *vub = VHOST_USER_BASE(vdev); - int ret, i; - - if (!k->set_guest_notifiers) { - error_report("binding does not support guest notifiers"); - return; - } - - ret = vhost_dev_enable_notifiers(&vub->vhost_dev, vdev); - if (ret < 0) { - error_report("Error enabling host notifiers: %d", -ret); - return; - } - - ret = k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, true); - if (ret < 0) { - error_report("Error binding guest notifier: %d", -ret); - goto err_host_notifiers; - } - - vub->vhost_dev.acked_features = vdev->guest_features; - - ret = vhost_dev_start(&vub->vhost_dev, vdev, true); - if (ret < 0) { - error_report("Error starting vhost-user-device: %d", -ret); - goto err_guest_notifiers; - } - - /* - * guest_notifier_mask/pending not used yet, so just unmask - * everything here. virtio-pci will do the right thing by - * enabling/disabling irqfd. - */ - for (i = 0; i < vub->vhost_dev.nvqs; i++) { - vhost_virtqueue_mask(&vub->vhost_dev, vdev, i, false); - } - - return; - -err_guest_notifiers: - k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, false); -err_host_notifiers: - vhost_dev_disable_notifiers(&vub->vhost_dev, vdev); -} - -static void vub_stop(VirtIODevice *vdev) -{ - VHostUserBase *vub = VHOST_USER_BASE(vdev); - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - int ret; - - if (!k->set_guest_notifiers) { - return; - } - - vhost_dev_stop(&vub->vhost_dev, vdev, true); - - ret = k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, false); - if (ret < 0) { - error_report("vhost guest notifier cleanup failed: %d", ret); - return; - } - - vhost_dev_disable_notifiers(&vub->vhost_dev, vdev); -} - -static void vub_set_status(VirtIODevice *vdev, uint8_t status) -{ - VHostUserBase *vub = VHOST_USER_BASE(vdev); - bool should_start = virtio_device_should_start(vdev, status); - - if (vhost_dev_is_started(&vub->vhost_dev) == should_start) { - return; - } - - if (should_start) { - vub_start(vdev); - } else { - vub_stop(vdev); - } -} - -/* - * For an implementation where everything is delegated to the backend - * we don't do anything other than return the full feature set offered - * by the daemon (module the reserved feature bit). - */ -static uint64_t vub_get_features(VirtIODevice *vdev, - uint64_t requested_features, Error **errp) -{ - VHostUserBase *vub = VHOST_USER_BASE(vdev); - /* This should be set when the vhost connection initialises */ - g_assert(vub->vhost_dev.features); - return vub->vhost_dev.features & ~(1ULL << VHOST_USER_F_PROTOCOL_FEATURES); -} - -/* - * To handle VirtIO config we need to know the size of the config - * space. We don't cache the config but re-fetch it from the guest - * every time in case something has changed. - */ -static void vub_get_config(VirtIODevice *vdev, uint8_t *config) -{ - VHostUserBase *vub = VHOST_USER_BASE(vdev); - Error *local_err = NULL; - - /* - * There will have been a warning during vhost_dev_init, but lets - * assert here as nothing will go right now. - */ - g_assert(vub->config_size && vub->vhost_user.supports_config == true); - - if (vhost_dev_get_config(&vub->vhost_dev, config, - vub->config_size, &local_err)) { - error_report_err(local_err); - } -} - -/* - * When the daemon signals an update to the config we just need to - * signal the guest as we re-read the config on demand above. - */ -static int vub_config_notifier(struct vhost_dev *dev) -{ - virtio_notify_config(dev->vdev); - return 0; -} - -const VhostDevConfigOps vub_config_ops = { - .vhost_dev_config_notifier = vub_config_notifier, -}; - -static void vub_handle_output(VirtIODevice *vdev, VirtQueue *vq) -{ - /* - * Not normally called; it's the daemon that handles the queue; - * however virtio's cleanup path can call this. - */ -} - -static void do_vhost_user_cleanup(VirtIODevice *vdev, VHostUserBase *vub) -{ - vhost_user_cleanup(&vub->vhost_user); - - for (int i = 0; i < vub->num_vqs; i++) { - VirtQueue *vq = g_ptr_array_index(vub->vqs, i); - virtio_delete_queue(vq); - } - - virtio_cleanup(vdev); -} - -static int vub_connect(DeviceState *dev) -{ - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserBase *vub = VHOST_USER_BASE(vdev); - struct vhost_dev *vhost_dev = &vub->vhost_dev; - - if (vub->connected) { - return 0; - } - vub->connected = true; - - /* - * If we support VHOST_USER_GET_CONFIG we must enable the notifier - * so we can ping the guest when it updates. - */ - if (vub->vhost_user.supports_config) { - vhost_dev_set_config_notifier(vhost_dev, &vub_config_ops); - } - - /* restore vhost state */ - if (virtio_device_started(vdev, vdev->status)) { - vub_start(vdev); - } - - return 0; -} - -static void vub_disconnect(DeviceState *dev) -{ - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserBase *vub = VHOST_USER_BASE(vdev); - - if (!vub->connected) { - return; - } - vub->connected = false; - - if (vhost_dev_is_started(&vub->vhost_dev)) { - vub_stop(vdev); - } -} - -static void vub_event(void *opaque, QEMUChrEvent event) -{ - DeviceState *dev = opaque; - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserBase *vub = VHOST_USER_BASE(vdev); - - switch (event) { - case CHR_EVENT_OPENED: - if (vub_connect(dev) < 0) { - qemu_chr_fe_disconnect(&vub->chardev); - return; - } - break; - case CHR_EVENT_CLOSED: - vub_disconnect(dev); - break; - case CHR_EVENT_BREAK: - case CHR_EVENT_MUX_IN: - case CHR_EVENT_MUX_OUT: - /* Ignore */ - break; - } -} - -static void vub_device_realize(DeviceState *dev, Error **errp) -{ - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserBase *vub = VHOST_USER_BASE(dev); - int ret; - - if (!vub->chardev.chr) { - error_setg(errp, "vhost-user-device: missing chardev"); - return; - } - - if (!vub->virtio_id) { - error_setg(errp, "vhost-user-device: need to define device id"); - return; - } - - if (!vub->num_vqs) { - vub->num_vqs = 1; /* reasonable default? */ - } - - /* - * We can't handle config requests unless we know the size of the - * config region, specialisations of the vhost-user-device will be - * able to set this. - */ - if (vub->config_size) { - vub->vhost_user.supports_config = true; - } - - if (!vhost_user_init(&vub->vhost_user, &vub->chardev, errp)) { - return; - } - - virtio_init(vdev, vub->virtio_id, vub->config_size); - - /* - * Disable guest notifiers, by default all notifications will be via the - * asynchronous vhost-user socket. - */ - vdev->use_guest_notifier_mask = false; - - /* Allocate queues */ - vub->vqs = g_ptr_array_sized_new(vub->num_vqs); - for (int i = 0; i < vub->num_vqs; i++) { - g_ptr_array_add(vub->vqs, - virtio_add_queue(vdev, 4, vub_handle_output)); - } - - vub->vhost_dev.nvqs = vub->num_vqs; - vub->vhost_dev.vqs = g_new0(struct vhost_virtqueue, vub->vhost_dev.nvqs); - - /* connect to backend */ - ret = vhost_dev_init(&vub->vhost_dev, &vub->vhost_user, - VHOST_BACKEND_TYPE_USER, 0, errp); - - if (ret < 0) { - do_vhost_user_cleanup(vdev, vub); - } - - qemu_chr_fe_set_handlers(&vub->chardev, NULL, NULL, vub_event, NULL, - dev, NULL, true); -} - -static void vub_device_unrealize(DeviceState *dev) -{ - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserBase *vub = VHOST_USER_BASE(dev); - struct vhost_virtqueue *vhost_vqs = vub->vhost_dev.vqs; - - /* This will stop vhost backend if appropriate. */ - vub_set_status(vdev, 0); - vhost_dev_cleanup(&vub->vhost_dev); - g_free(vhost_vqs); - do_vhost_user_cleanup(vdev, vub); -} - -static void vub_class_init(ObjectClass *klass, void *data) -{ - VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); - - vdc->realize = vub_device_realize; - vdc->unrealize = vub_device_unrealize; - vdc->get_features = vub_get_features; - vdc->get_config = vub_get_config; - vdc->set_status = vub_set_status; -} - -static const TypeInfo vub_info = { - .name = TYPE_VHOST_USER_BASE, - .parent = TYPE_VIRTIO_DEVICE, - .instance_size = sizeof(VHostUserBase), - .class_init = vub_class_init, - .class_size = sizeof(VHostUserBaseClass), - .abstract = true -}; - - /* * The following is a concrete implementation of the base class which * allows the user to define the key parameters via the command line. @@ -349,6 +32,7 @@ static const VMStateDescription vud_vmstate = { static Property vud_properties[] = { DEFINE_PROP_CHR("chardev", VHostUserBase, chardev), DEFINE_PROP_UINT16("virtio-id", VHostUserBase, virtio_id, 0), + DEFINE_PROP_UINT32("vq_size", VHostUserBase, vq_size, 64), DEFINE_PROP_UINT32("num_vqs", VHostUserBase, num_vqs, 1), DEFINE_PROP_UINT32("config_size", VHostUserBase, config_size, 0), DEFINE_PROP_END_OF_LIST(), @@ -358,6 +42,9 @@ static void vud_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + /* Reason: stop inexperienced users confusing themselves */ + dc->user_creatable = false; + device_class_set_props(dc, vud_properties); dc->vmsd = &vud_vmstate; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); @@ -366,14 +53,11 @@ static void vud_class_init(ObjectClass *klass, void *data) static const TypeInfo vud_info = { .name = TYPE_VHOST_USER_DEVICE, .parent = TYPE_VHOST_USER_BASE, - .instance_size = sizeof(VHostUserBase), .class_init = vud_class_init, - .class_size = sizeof(VHostUserBaseClass), }; static void vu_register_types(void) { - type_register_static(&vub_info); type_register_static(&vud_info); } diff --git a/include/hw/virtio/vhost-user-device.h b/include/hw/virtio/vhost-user-base.h similarity index 71% rename from include/hw/virtio/vhost-user-device.h rename to include/hw/virtio/vhost-user-base.h index 3ddf88a146..51d0968b89 100644 --- a/include/hw/virtio/vhost-user-device.h +++ b/include/hw/virtio/vhost-user-base.h @@ -6,8 +6,8 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef QEMU_VHOST_USER_DEVICE_H -#define QEMU_VHOST_USER_DEVICE_H +#ifndef QEMU_VHOST_USER_BASE_H +#define QEMU_VHOST_USER_BASE_H #include "hw/virtio/vhost.h" #include "hw/virtio/vhost-user.h" @@ -17,11 +17,13 @@ OBJECT_DECLARE_TYPE(VHostUserBase, VHostUserBaseClass, VHOST_USER_BASE) struct VHostUserBase { - VirtIODevice parent; + VirtIODevice parent_obj; + /* Properties */ CharBackend chardev; uint16_t virtio_id; uint32_t num_vqs; + uint32_t vq_size; /* can't exceed VIRTIO_QUEUE_MAX */ uint32_t config_size; /* State tracking */ VhostUserState vhost_user; @@ -31,16 +33,17 @@ struct VHostUserBase { bool connected; }; - /* needed so we can use the base realize after specialisation - tweaks */ +/* + * Needed so we can use the base realize after specialisation + * tweaks + */ struct VHostUserBaseClass { - /*< private >*/ VirtioDeviceClass parent_class; - /*< public >*/ + DeviceRealize parent_realize; }; -/* shared for the benefit of the derived pci class */ + #define TYPE_VHOST_USER_DEVICE "vhost-user-device" -#endif /* QEMU_VHOST_USER_DEVICE_H */ +#endif /* QEMU_VHOST_USER_BASE_H */ From 64a312a2737e3aab97f103b76f8e76a97770e670 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Thu, 4 Jan 2024 21:09:36 +0000 Subject: [PATCH 02/60] hw/virtio: convert vhost-user-base to async shutdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are about to convert at least one stubs which was using the async teardown so lets use it for all the cases. Signed-off-by: Alex Bennée Message-Id: <20240104210945.1223134-3-alex.bennee@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user-base.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c index 620fa5cb4a..78cfa9a5bb 100644 --- a/hw/virtio/vhost-user-base.c +++ b/hw/virtio/vhost-user-base.c @@ -201,6 +201,8 @@ static int vub_connect(DeviceState *dev) return 0; } +static void vub_event(void *opaque, QEMUChrEvent event); + static void vub_disconnect(DeviceState *dev) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); @@ -211,9 +213,13 @@ static void vub_disconnect(DeviceState *dev) } vub->connected = false; - if (vhost_dev_is_started(&vub->vhost_dev)) { - vub_stop(vdev); - } + vub_stop(vdev); + vhost_dev_cleanup(&vub->vhost_dev); + + /* Re-instate the event handler for new connections */ + qemu_chr_fe_set_handlers(&vub->chardev, + NULL, NULL, vub_event, + NULL, dev, NULL, true); } static void vub_event(void *opaque, QEMUChrEvent event) @@ -230,7 +236,9 @@ static void vub_event(void *opaque, QEMUChrEvent event) } break; case CHR_EVENT_CLOSED: - vub_disconnect(dev); + /* defer close until later to avoid circular close */ + vhost_user_async_close(dev, &vub->chardev, &vub->vhost_dev, + vub_disconnect, vub_event); break; case CHR_EVENT_BREAK: case CHR_EVENT_MUX_IN: From 233412bf7a2b0349aa3b80ab7217e741c8acef3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Thu, 4 Jan 2024 21:09:37 +0000 Subject: [PATCH 03/60] hw/virtio: derive vhost-user-rng from vhost-user-base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now we can take advantage of our new base class and make vhost-user-rng a much simpler boilerplate wrapper. Also as this doesn't require any target specific hacks we only need to build the stubs once. Acked-by: Mark Cave-Ayland Signed-off-by: Alex Bennée Message-Id: <20240104210945.1223134-4-alex.bennee@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/meson.build | 9 +- hw/virtio/vhost-user-rng.c | 294 +++-------------------------- include/hw/virtio/vhost-user-rng.h | 13 +- 3 files changed, 31 insertions(+), 285 deletions(-) diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 3ccddbe950..4f85e7a9ed 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -18,8 +18,15 @@ if have_vhost # fixme - this really should be generic specific_virtio_ss.add(files('vhost-user.c')) system_virtio_ss.add(files('vhost-user-base.c')) + + # MMIO Stubs system_virtio_ss.add(files('vhost-user-device.c')) + system_virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) + + # PCI Stubs system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) + system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_RNG'], + if_true: files('vhost-user-rng-pci.c')) endif if have_vhost_vdpa system_virtio_ss.add(files('vhost-vdpa.c')) @@ -37,7 +44,6 @@ specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-us specific_virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) -specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) specific_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], if_true: files('vhost-user-gpio-pci.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_SCMI', if_true: files('vhost-user-scmi.c')) @@ -49,7 +55,6 @@ virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vs virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_INPUT', if_true: files('vhost-user-input-pci.c')) -virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_FS', if_true: files('vhost-user-fs-pci.c')) diff --git a/hw/virtio/vhost-user-rng.c b/hw/virtio/vhost-user-rng.c index 24ac1a22c8..01879c863d 100644 --- a/hw/virtio/vhost-user-rng.c +++ b/hw/virtio/vhost-user-rng.c @@ -3,7 +3,7 @@ * * Copyright (c) 2021 Mathieu Poirier * - * Implementation seriously tailored on vhost-user-i2c.c + * Simple wrapper of the generic vhost-user-device. * * SPDX-License-Identifier: GPL-2.0-or-later */ @@ -13,297 +13,47 @@ #include "hw/qdev-properties.h" #include "hw/virtio/virtio-bus.h" #include "hw/virtio/vhost-user-rng.h" -#include "qemu/error-report.h" #include "standard-headers/linux/virtio_ids.h" -static const int feature_bits[] = { - VIRTIO_F_RING_RESET, - VHOST_INVALID_FEATURE_BIT -}; - -static void vu_rng_start(VirtIODevice *vdev) -{ - VHostUserRNG *rng = VHOST_USER_RNG(vdev); - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - int ret; - int i; - - if (!k->set_guest_notifiers) { - error_report("binding does not support guest notifiers"); - return; - } - - ret = vhost_dev_enable_notifiers(&rng->vhost_dev, vdev); - if (ret < 0) { - error_report("Error enabling host notifiers: %d", -ret); - return; - } - - ret = k->set_guest_notifiers(qbus->parent, rng->vhost_dev.nvqs, true); - if (ret < 0) { - error_report("Error binding guest notifier: %d", -ret); - goto err_host_notifiers; - } - - rng->vhost_dev.acked_features = vdev->guest_features; - ret = vhost_dev_start(&rng->vhost_dev, vdev, true); - if (ret < 0) { - error_report("Error starting vhost-user-rng: %d", -ret); - goto err_guest_notifiers; - } - - /* - * guest_notifier_mask/pending not used yet, so just unmask - * everything here. virtio-pci will do the right thing by - * enabling/disabling irqfd. - */ - for (i = 0; i < rng->vhost_dev.nvqs; i++) { - vhost_virtqueue_mask(&rng->vhost_dev, vdev, i, false); - } - - return; - -err_guest_notifiers: - k->set_guest_notifiers(qbus->parent, rng->vhost_dev.nvqs, false); -err_host_notifiers: - vhost_dev_disable_notifiers(&rng->vhost_dev, vdev); -} - -static void vu_rng_stop(VirtIODevice *vdev) -{ - VHostUserRNG *rng = VHOST_USER_RNG(vdev); - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - int ret; - - if (!k->set_guest_notifiers) { - return; - } - - vhost_dev_stop(&rng->vhost_dev, vdev, true); - - ret = k->set_guest_notifiers(qbus->parent, rng->vhost_dev.nvqs, false); - if (ret < 0) { - error_report("vhost guest notifier cleanup failed: %d", ret); - return; - } - - vhost_dev_disable_notifiers(&rng->vhost_dev, vdev); -} - -static void vu_rng_set_status(VirtIODevice *vdev, uint8_t status) -{ - VHostUserRNG *rng = VHOST_USER_RNG(vdev); - bool should_start = virtio_device_should_start(vdev, status); - - if (vhost_dev_is_started(&rng->vhost_dev) == should_start) { - return; - } - - if (should_start) { - vu_rng_start(vdev); - } else { - vu_rng_stop(vdev); - } -} - -static uint64_t vu_rng_get_features(VirtIODevice *vdev, - uint64_t requested_features, Error **errp) -{ - VHostUserRNG *rng = VHOST_USER_RNG(vdev); - - return vhost_get_features(&rng->vhost_dev, feature_bits, - requested_features); -} - -static void vu_rng_handle_output(VirtIODevice *vdev, VirtQueue *vq) -{ - /* - * Not normally called; it's the daemon that handles the queue; - * however virtio's cleanup path can call this. - */ -} - -static void vu_rng_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) -{ - VHostUserRNG *rng = VHOST_USER_RNG(vdev); - - /* - * We don't support interrupts, return early if index is set to - * VIRTIO_CONFIG_IRQ_IDX. - */ - if (idx == VIRTIO_CONFIG_IRQ_IDX) { - return; - } - - vhost_virtqueue_mask(&rng->vhost_dev, vdev, idx, mask); -} - -static bool vu_rng_guest_notifier_pending(VirtIODevice *vdev, int idx) -{ - VHostUserRNG *rng = VHOST_USER_RNG(vdev); - - /* - * We don't support interrupts, return early if index is set to - * VIRTIO_CONFIG_IRQ_IDX. - */ - if (idx == VIRTIO_CONFIG_IRQ_IDX) { - return false; - } - - return vhost_virtqueue_pending(&rng->vhost_dev, idx); -} - -static void vu_rng_connect(DeviceState *dev) -{ - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserRNG *rng = VHOST_USER_RNG(vdev); - - if (rng->connected) { - return; - } - - rng->connected = true; - - /* restore vhost state */ - if (virtio_device_started(vdev, vdev->status)) { - vu_rng_start(vdev); - } -} - -static void vu_rng_disconnect(DeviceState *dev) -{ - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserRNG *rng = VHOST_USER_RNG(vdev); - - if (!rng->connected) { - return; - } - - rng->connected = false; - - if (vhost_dev_is_started(&rng->vhost_dev)) { - vu_rng_stop(vdev); - } -} - -static void vu_rng_event(void *opaque, QEMUChrEvent event) -{ - DeviceState *dev = opaque; - - switch (event) { - case CHR_EVENT_OPENED: - vu_rng_connect(dev); - break; - case CHR_EVENT_CLOSED: - vu_rng_disconnect(dev); - break; - case CHR_EVENT_BREAK: - case CHR_EVENT_MUX_IN: - case CHR_EVENT_MUX_OUT: - /* Ignore */ - break; - } -} - -static void vu_rng_device_realize(DeviceState *dev, Error **errp) -{ - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserRNG *rng = VHOST_USER_RNG(dev); - int ret; - - if (!rng->chardev.chr) { - error_setg(errp, "missing chardev"); - return; - } - - if (!vhost_user_init(&rng->vhost_user, &rng->chardev, errp)) { - return; - } - - virtio_init(vdev, VIRTIO_ID_RNG, 0); - - rng->req_vq = virtio_add_queue(vdev, 4, vu_rng_handle_output); - if (!rng->req_vq) { - error_setg_errno(errp, -1, "virtio_add_queue() failed"); - goto virtio_add_queue_failed; - } - - rng->vhost_dev.nvqs = 1; - rng->vhost_dev.vqs = g_new0(struct vhost_virtqueue, rng->vhost_dev.nvqs); - ret = vhost_dev_init(&rng->vhost_dev, &rng->vhost_user, - VHOST_BACKEND_TYPE_USER, 0, errp); - if (ret < 0) { - error_setg_errno(errp, -ret, "vhost_dev_init() failed"); - goto vhost_dev_init_failed; - } - - qemu_chr_fe_set_handlers(&rng->chardev, NULL, NULL, vu_rng_event, NULL, - dev, NULL, true); - - return; - -vhost_dev_init_failed: - g_free(rng->vhost_dev.vqs); - virtio_delete_queue(rng->req_vq); -virtio_add_queue_failed: - virtio_cleanup(vdev); - vhost_user_cleanup(&rng->vhost_user); -} - -static void vu_rng_device_unrealize(DeviceState *dev) -{ - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserRNG *rng = VHOST_USER_RNG(dev); - struct vhost_virtqueue *vhost_vqs = rng->vhost_dev.vqs; - - vu_rng_set_status(vdev, 0); - - vhost_dev_cleanup(&rng->vhost_dev); - g_free(vhost_vqs); - virtio_delete_queue(rng->req_vq); - virtio_cleanup(vdev); - vhost_user_cleanup(&rng->vhost_user); -} - -static struct vhost_dev *vu_rng_get_vhost(VirtIODevice *vdev) -{ - VHostUserRNG *rng = VHOST_USER_RNG(vdev); - return &rng->vhost_dev; -} - static const VMStateDescription vu_rng_vmstate = { .name = "vhost-user-rng", .unmigratable = 1, }; -static Property vu_rng_properties[] = { - DEFINE_PROP_CHR("chardev", VHostUserRNG, chardev), +static Property vrng_properties[] = { + DEFINE_PROP_CHR("chardev", VHostUserBase, chardev), DEFINE_PROP_END_OF_LIST(), }; +static void vu_rng_base_realize(DeviceState *dev, Error **errp) +{ + VHostUserBase *vub = VHOST_USER_BASE(dev); + VHostUserBaseClass *vubs = VHOST_USER_BASE_GET_CLASS(dev); + + /* Fixed for RNG */ + vub->virtio_id = VIRTIO_ID_RNG; + vub->num_vqs = 1; + vub->vq_size = 4; + + vubs->parent_realize(dev, errp); +} + static void vu_rng_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); - VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + VHostUserBaseClass *vubc = VHOST_USER_BASE_CLASS(klass); - device_class_set_props(dc, vu_rng_properties); dc->vmsd = &vu_rng_vmstate; - set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + device_class_set_props(dc, vrng_properties); + device_class_set_parent_realize(dc, vu_rng_base_realize, + &vubc->parent_realize); - vdc->realize = vu_rng_device_realize; - vdc->unrealize = vu_rng_device_unrealize; - vdc->get_features = vu_rng_get_features; - vdc->set_status = vu_rng_set_status; - vdc->guest_notifier_mask = vu_rng_guest_notifier_mask; - vdc->guest_notifier_pending = vu_rng_guest_notifier_pending; - vdc->get_vhost = vu_rng_get_vhost; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); } static const TypeInfo vu_rng_info = { .name = TYPE_VHOST_USER_RNG, - .parent = TYPE_VIRTIO_DEVICE, + .parent = TYPE_VHOST_USER_BASE, .instance_size = sizeof(VHostUserRNG), .class_init = vu_rng_class_init, }; diff --git a/include/hw/virtio/vhost-user-rng.h b/include/hw/virtio/vhost-user-rng.h index ddd9f01eea..10868c7de4 100644 --- a/include/hw/virtio/vhost-user-rng.h +++ b/include/hw/virtio/vhost-user-rng.h @@ -12,22 +12,13 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/vhost.h" #include "hw/virtio/vhost-user.h" -#include "chardev/char-fe.h" +#include "hw/virtio/vhost-user-base.h" #define TYPE_VHOST_USER_RNG "vhost-user-rng" OBJECT_DECLARE_SIMPLE_TYPE(VHostUserRNG, VHOST_USER_RNG) struct VHostUserRNG { - /*< private >*/ - VirtIODevice parent; - CharBackend chardev; - struct vhost_virtqueue *vhost_vq; - struct vhost_dev vhost_dev; - VhostUserState vhost_user; - VirtQueue *req_vq; - bool connected; - - /*< public >*/ + VHostUserBase parent_obj; }; #endif /* QEMU_VHOST_USER_RNG_H */ From 5ba587580db55bd310dc64d0eb89b4f7b19c6404 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Thu, 4 Jan 2024 21:09:38 +0000 Subject: [PATCH 04/60] hw/virtio: derive vhost-user-gpio from vhost-user-base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now the new base class supports config handling we can take advantage and make vhost-user-gpio a much simpler boilerplate wrapper. Also as this doesn't require any target specific hacks we only need to build the stubs once. Acked-by: Mark Cave-Ayland Acked-by: Viresh Kumar Signed-off-by: Alex Bennée Message-Id: <20240104210945.1223134-5-alex.bennee@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/meson.build | 5 +- hw/virtio/vhost-user-gpio.c | 407 ++-------------------------- include/hw/virtio/vhost-user-gpio.h | 25 +- 3 files changed, 22 insertions(+), 415 deletions(-) diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 4f85e7a9ed..374a616580 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -21,10 +21,13 @@ if have_vhost # MMIO Stubs system_virtio_ss.add(files('vhost-user-device.c')) + system_virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) system_virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) # PCI Stubs system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) + system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], + if_true: files('vhost-user-gpio-pci.c')) system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_RNG'], if_true: files('vhost-user-rng-pci.c')) endif @@ -44,8 +47,6 @@ specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-us specific_virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) -specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) -specific_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], if_true: files('vhost-user-gpio-pci.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_SCMI', if_true: files('vhost-user-scmi.c')) specific_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_SCMI'], if_true: files('vhost-user-scmi-pci.c')) diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c index a83437a5da..9f37c25415 100644 --- a/hw/virtio/vhost-user-gpio.c +++ b/hw/virtio/vhost-user-gpio.c @@ -11,388 +11,25 @@ #include "hw/qdev-properties.h" #include "hw/virtio/virtio-bus.h" #include "hw/virtio/vhost-user-gpio.h" -#include "qemu/error-report.h" #include "standard-headers/linux/virtio_ids.h" -#include "trace.h" +#include "standard-headers/linux/virtio_gpio.h" -#define VHOST_NVQS 2 - -/* Features required from VirtIO */ -static const int feature_bits[] = { - VIRTIO_F_VERSION_1, - VIRTIO_F_NOTIFY_ON_EMPTY, - VIRTIO_RING_F_INDIRECT_DESC, - VIRTIO_RING_F_EVENT_IDX, - VIRTIO_GPIO_F_IRQ, - VIRTIO_F_RING_RESET, - VHOST_INVALID_FEATURE_BIT +static Property vgpio_properties[] = { + DEFINE_PROP_CHR("chardev", VHostUserBase, chardev), + DEFINE_PROP_END_OF_LIST(), }; -static void vu_gpio_get_config(VirtIODevice *vdev, uint8_t *config) +static void vgpio_realize(DeviceState *dev, Error **errp) { - VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); + VHostUserBase *vub = VHOST_USER_BASE(dev); + VHostUserBaseClass *vubc = VHOST_USER_BASE_GET_CLASS(dev); - memcpy(config, &gpio->config, sizeof(gpio->config)); -} + /* Fixed for GPIO */ + vub->virtio_id = VIRTIO_ID_GPIO; + vub->num_vqs = 2; + vub->config_size = sizeof(struct virtio_gpio_config); -static int vu_gpio_config_notifier(struct vhost_dev *dev) -{ - VHostUserGPIO *gpio = VHOST_USER_GPIO(dev->vdev); - - memcpy(dev->vdev->config, &gpio->config, sizeof(gpio->config)); - virtio_notify_config(dev->vdev); - - return 0; -} - -const VhostDevConfigOps gpio_ops = { - .vhost_dev_config_notifier = vu_gpio_config_notifier, -}; - -static int vu_gpio_start(VirtIODevice *vdev) -{ - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); - struct vhost_dev *vhost_dev = &gpio->vhost_dev; - int ret, i; - - if (!k->set_guest_notifiers) { - error_report("binding does not support guest notifiers"); - return -ENOSYS; - } - - ret = vhost_dev_enable_notifiers(vhost_dev, vdev); - if (ret < 0) { - error_report("Error enabling host notifiers: %d", ret); - return ret; - } - - ret = k->set_guest_notifiers(qbus->parent, vhost_dev->nvqs, true); - if (ret < 0) { - error_report("Error binding guest notifier: %d", ret); - goto err_host_notifiers; - } - - /* - * Before we start up we need to ensure we have the final feature - * set needed for the vhost configuration. The backend may also - * apply backend_features when the feature set is sent. - */ - vhost_ack_features(&gpio->vhost_dev, feature_bits, vdev->guest_features); - - ret = vhost_dev_start(&gpio->vhost_dev, vdev, false); - if (ret < 0) { - error_report("Error starting vhost-user-gpio: %d", ret); - goto err_guest_notifiers; - } - gpio->started_vu = true; - - /* - * guest_notifier_mask/pending not used yet, so just unmask - * everything here. virtio-pci will do the right thing by - * enabling/disabling irqfd. - */ - for (i = 0; i < gpio->vhost_dev.nvqs; i++) { - vhost_virtqueue_mask(&gpio->vhost_dev, vdev, i, false); - } - - /* - * As we must have VHOST_USER_F_PROTOCOL_FEATURES (because - * VHOST_USER_GET_CONFIG requires it) we need to explicitly enable - * the vrings. - */ - g_assert(vhost_dev->vhost_ops && - vhost_dev->vhost_ops->vhost_set_vring_enable); - ret = vhost_dev->vhost_ops->vhost_set_vring_enable(vhost_dev, true); - if (ret == 0) { - return 0; - } - - error_report("Failed to start vrings for vhost-user-gpio: %d", ret); - -err_guest_notifiers: - k->set_guest_notifiers(qbus->parent, gpio->vhost_dev.nvqs, false); -err_host_notifiers: - vhost_dev_disable_notifiers(&gpio->vhost_dev, vdev); - - return ret; -} - -static void vu_gpio_stop(VirtIODevice *vdev) -{ - VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - struct vhost_dev *vhost_dev = &gpio->vhost_dev; - int ret; - - if (!gpio->started_vu) { - return; - } - gpio->started_vu = false; - - if (!k->set_guest_notifiers) { - return; - } - - vhost_dev_stop(vhost_dev, vdev, false); - - ret = k->set_guest_notifiers(qbus->parent, vhost_dev->nvqs, false); - if (ret < 0) { - error_report("vhost guest notifier cleanup failed: %d", ret); - return; - } - - vhost_dev_disable_notifiers(vhost_dev, vdev); -} - -static void vu_gpio_set_status(VirtIODevice *vdev, uint8_t status) -{ - VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); - bool should_start = virtio_device_should_start(vdev, status); - - trace_virtio_gpio_set_status(status); - - if (!gpio->connected) { - return; - } - - if (vhost_dev_is_started(&gpio->vhost_dev) == should_start) { - return; - } - - if (should_start) { - if (vu_gpio_start(vdev)) { - qemu_chr_fe_disconnect(&gpio->chardev); - } - } else { - vu_gpio_stop(vdev); - } -} - -static uint64_t vu_gpio_get_features(VirtIODevice *vdev, uint64_t features, - Error **errp) -{ - VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); - - return vhost_get_features(&gpio->vhost_dev, feature_bits, features); -} - -static void vu_gpio_handle_output(VirtIODevice *vdev, VirtQueue *vq) -{ - /* - * Not normally called; it's the daemon that handles the queue; - * however virtio's cleanup path can call this. - */ -} - -static void vu_gpio_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) -{ - VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); - - /* - * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 - * as the macro of configure interrupt's IDX, If this driver does not - * support, the function will return - */ - - if (idx == VIRTIO_CONFIG_IRQ_IDX) { - return; - } - - vhost_virtqueue_mask(&gpio->vhost_dev, vdev, idx, mask); -} - -static struct vhost_dev *vu_gpio_get_vhost(VirtIODevice *vdev) -{ - VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); - return &gpio->vhost_dev; -} - -static void do_vhost_user_cleanup(VirtIODevice *vdev, VHostUserGPIO *gpio) -{ - virtio_delete_queue(gpio->command_vq); - virtio_delete_queue(gpio->interrupt_vq); - g_free(gpio->vhost_vqs); - virtio_cleanup(vdev); - vhost_user_cleanup(&gpio->vhost_user); -} - -static int vu_gpio_connect(DeviceState *dev, Error **errp) -{ - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); - struct vhost_dev *vhost_dev = &gpio->vhost_dev; - int ret; - - if (gpio->connected) { - return 0; - } - - vhost_dev_set_config_notifier(vhost_dev, &gpio_ops); - gpio->vhost_user.supports_config = true; - - gpio->vhost_dev.nvqs = VHOST_NVQS; - gpio->vhost_dev.vqs = gpio->vhost_vqs; - - ret = vhost_dev_init(vhost_dev, &gpio->vhost_user, - VHOST_BACKEND_TYPE_USER, 0, errp); - if (ret < 0) { - return ret; - } - - gpio->connected = true; - - /* restore vhost state */ - if (virtio_device_started(vdev, vdev->status)) { - vu_gpio_start(vdev); - } - - return 0; -} - -static void vu_gpio_event(void *opaque, QEMUChrEvent event); - -static void vu_gpio_disconnect(DeviceState *dev) -{ - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); - - if (!gpio->connected) { - return; - } - gpio->connected = false; - - vu_gpio_stop(vdev); - vhost_dev_cleanup(&gpio->vhost_dev); - - /* Re-instate the event handler for new connections */ - qemu_chr_fe_set_handlers(&gpio->chardev, - NULL, NULL, vu_gpio_event, - NULL, dev, NULL, true); -} - -static void vu_gpio_event(void *opaque, QEMUChrEvent event) -{ - DeviceState *dev = opaque; - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); - Error *local_err = NULL; - - switch (event) { - case CHR_EVENT_OPENED: - if (vu_gpio_connect(dev, &local_err) < 0) { - qemu_chr_fe_disconnect(&gpio->chardev); - return; - } - break; - case CHR_EVENT_CLOSED: - /* defer close until later to avoid circular close */ - vhost_user_async_close(dev, &gpio->chardev, &gpio->vhost_dev, - vu_gpio_disconnect, vu_gpio_event); - break; - case CHR_EVENT_BREAK: - case CHR_EVENT_MUX_IN: - case CHR_EVENT_MUX_OUT: - /* Ignore */ - break; - } -} - -static int vu_gpio_realize_connect(VHostUserGPIO *gpio, Error **errp) -{ - VirtIODevice *vdev = &gpio->parent_obj; - DeviceState *dev = &vdev->parent_obj; - struct vhost_dev *vhost_dev = &gpio->vhost_dev; - int ret; - - ret = qemu_chr_fe_wait_connected(&gpio->chardev, errp); - if (ret < 0) { - return ret; - } - - /* - * vu_gpio_connect() may have already connected (via the event - * callback) in which case it will just report success. - */ - ret = vu_gpio_connect(dev, errp); - if (ret < 0) { - qemu_chr_fe_disconnect(&gpio->chardev); - return ret; - } - g_assert(gpio->connected); - - ret = vhost_dev_get_config(vhost_dev, (uint8_t *)&gpio->config, - sizeof(gpio->config), errp); - - if (ret < 0) { - error_report("vhost-user-gpio: get config failed"); - - qemu_chr_fe_disconnect(&gpio->chardev); - vhost_dev_cleanup(vhost_dev); - return ret; - } - - return 0; -} - -static void vu_gpio_device_realize(DeviceState *dev, Error **errp) -{ - ERRP_GUARD(); - - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserGPIO *gpio = VHOST_USER_GPIO(dev); - int retries, ret; - - if (!gpio->chardev.chr) { - error_setg(errp, "vhost-user-gpio: chardev is mandatory"); - return; - } - - if (!vhost_user_init(&gpio->vhost_user, &gpio->chardev, errp)) { - return; - } - - virtio_init(vdev, VIRTIO_ID_GPIO, sizeof(gpio->config)); - - gpio->command_vq = virtio_add_queue(vdev, 256, vu_gpio_handle_output); - gpio->interrupt_vq = virtio_add_queue(vdev, 256, vu_gpio_handle_output); - gpio->vhost_vqs = g_new0(struct vhost_virtqueue, VHOST_NVQS); - - gpio->connected = false; - - qemu_chr_fe_set_handlers(&gpio->chardev, NULL, NULL, vu_gpio_event, NULL, - dev, NULL, true); - - retries = VU_REALIZE_CONN_RETRIES; - g_assert(!*errp); - do { - if (*errp) { - error_prepend(errp, "Reconnecting after error: "); - error_report_err(*errp); - *errp = NULL; - } - ret = vu_gpio_realize_connect(gpio, errp); - } while (ret < 0 && retries--); - - if (ret < 0) { - do_vhost_user_cleanup(vdev, gpio); - } - - return; -} - -static void vu_gpio_device_unrealize(DeviceState *dev) -{ - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserGPIO *gpio = VHOST_USER_GPIO(dev); - - vu_gpio_set_status(vdev, 0); - qemu_chr_fe_set_handlers(&gpio->chardev, NULL, NULL, NULL, NULL, NULL, NULL, - false); - vhost_dev_cleanup(&gpio->vhost_dev); - do_vhost_user_cleanup(vdev, gpio); + vubc->parent_realize(dev, errp); } static const VMStateDescription vu_gpio_vmstate = { @@ -400,31 +37,21 @@ static const VMStateDescription vu_gpio_vmstate = { .unmigratable = 1, }; -static Property vu_gpio_properties[] = { - DEFINE_PROP_CHR("chardev", VHostUserGPIO, chardev), - DEFINE_PROP_END_OF_LIST(), -}; - static void vu_gpio_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); - VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + VHostUserBaseClass *vubc = VHOST_USER_BASE_CLASS(klass); - device_class_set_props(dc, vu_gpio_properties); dc->vmsd = &vu_gpio_vmstate; + device_class_set_props(dc, vgpio_properties); + device_class_set_parent_realize(dc, vgpio_realize, + &vubc->parent_realize); set_bit(DEVICE_CATEGORY_INPUT, dc->categories); - vdc->realize = vu_gpio_device_realize; - vdc->unrealize = vu_gpio_device_unrealize; - vdc->get_features = vu_gpio_get_features; - vdc->get_config = vu_gpio_get_config; - vdc->set_status = vu_gpio_set_status; - vdc->guest_notifier_mask = vu_gpio_guest_notifier_mask; - vdc->get_vhost = vu_gpio_get_vhost; } static const TypeInfo vu_gpio_info = { .name = TYPE_VHOST_USER_GPIO, - .parent = TYPE_VIRTIO_DEVICE, + .parent = TYPE_VHOST_USER_BASE, .instance_size = sizeof(VHostUserGPIO), .class_init = vu_gpio_class_init, }; diff --git a/include/hw/virtio/vhost-user-gpio.h b/include/hw/virtio/vhost-user-gpio.h index a9d3f9b049..5814a8400a 100644 --- a/include/hw/virtio/vhost-user-gpio.h +++ b/include/hw/virtio/vhost-user-gpio.h @@ -12,34 +12,13 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/vhost.h" #include "hw/virtio/vhost-user.h" -#include "standard-headers/linux/virtio_gpio.h" -#include "chardev/char-fe.h" +#include "hw/virtio/vhost-user-base.h" #define TYPE_VHOST_USER_GPIO "vhost-user-gpio-device" OBJECT_DECLARE_SIMPLE_TYPE(VHostUserGPIO, VHOST_USER_GPIO); struct VHostUserGPIO { - /*< private >*/ - VirtIODevice parent_obj; - CharBackend chardev; - struct virtio_gpio_config config; - struct vhost_virtqueue *vhost_vqs; - struct vhost_dev vhost_dev; - VhostUserState vhost_user; - VirtQueue *command_vq; - VirtQueue *interrupt_vq; - /** - * There are at least two steps of initialization of the - * vhost-user device. The first is a "connect" step and - * second is a "start" step. Make a separation between - * those initialization phases by using two fields. - * - * @connected: see vu_gpio_connect()/vu_gpio_disconnect() - * @started_vu: see vu_gpio_start()/vu_gpio_stop() - */ - bool connected; - bool started_vu; - /*< public >*/ + VHostUserBase parent_obj; }; #endif /* _QEMU_VHOST_USER_GPIO_H */ From a50616b50d0107c3bbdf9fae5ac1f3659dfc70f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Thu, 4 Jan 2024 21:09:39 +0000 Subject: [PATCH 05/60] hw/virtio: derive vhost-user-i2c from vhost-user-base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now we can take advantage of the new base class and make vhost-user-i2c a much simpler boilerplate wrapper. Also as this doesn't require any target specific hacks we only need to build the stubs once. Acked-by: Mark Cave-Ayland Acked-by: Viresh Kumar Signed-off-by: Alex Bennée Message-Id: <20240104210945.1223134-6-alex.bennee@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/meson.build | 5 +- hw/virtio/vhost-user-i2c.c | 272 ++--------------------------- include/hw/virtio/vhost-user-i2c.h | 14 +- 3 files changed, 23 insertions(+), 268 deletions(-) diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 374a616580..685d586e69 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -22,12 +22,15 @@ if have_vhost # MMIO Stubs system_virtio_ss.add(files('vhost-user-device.c')) system_virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) + system_virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) system_virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) # PCI Stubs system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], if_true: files('vhost-user-gpio-pci.c')) + system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_I2C'], + if_true: files('vhost-user-i2c-pci.c')) system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_RNG'], if_true: files('vhost-user-rng-pci.c')) endif @@ -46,7 +49,6 @@ specific_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock.c specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem.c')) -specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_SCMI', if_true: files('vhost-user-scmi.c')) specific_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_SCMI'], if_true: files('vhost-user-scmi-pci.c')) @@ -54,7 +56,6 @@ virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk-pci.c')) -virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_INPUT', if_true: files('vhost-user-input-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi-pci.c')) diff --git a/hw/virtio/vhost-user-i2c.c b/hw/virtio/vhost-user-i2c.c index 4eef3f0633..a464f5e039 100644 --- a/hw/virtio/vhost-user-i2c.c +++ b/hw/virtio/vhost-user-i2c.c @@ -14,253 +14,22 @@ #include "qemu/error-report.h" #include "standard-headers/linux/virtio_ids.h" -static const int feature_bits[] = { - VIRTIO_I2C_F_ZERO_LENGTH_REQUEST, - VIRTIO_F_RING_RESET, - VHOST_INVALID_FEATURE_BIT +static Property vi2c_properties[] = { + DEFINE_PROP_CHR("chardev", VHostUserBase, chardev), + DEFINE_PROP_END_OF_LIST(), }; -static void vu_i2c_start(VirtIODevice *vdev) +static void vi2c_realize(DeviceState *dev, Error **errp) { - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - VHostUserI2C *i2c = VHOST_USER_I2C(vdev); - int ret, i; + VHostUserBase *vub = VHOST_USER_BASE(dev); + VHostUserBaseClass *vubc = VHOST_USER_BASE_GET_CLASS(dev); - if (!k->set_guest_notifiers) { - error_report("binding does not support guest notifiers"); - return; - } + /* Fixed for I2C */ + vub->virtio_id = VIRTIO_ID_I2C_ADAPTER; + vub->num_vqs = 1; + vub->vq_size = 4; - ret = vhost_dev_enable_notifiers(&i2c->vhost_dev, vdev); - if (ret < 0) { - error_report("Error enabling host notifiers: %d", -ret); - return; - } - - ret = k->set_guest_notifiers(qbus->parent, i2c->vhost_dev.nvqs, true); - if (ret < 0) { - error_report("Error binding guest notifier: %d", -ret); - goto err_host_notifiers; - } - - i2c->vhost_dev.acked_features = vdev->guest_features; - - ret = vhost_dev_start(&i2c->vhost_dev, vdev, true); - if (ret < 0) { - error_report("Error starting vhost-user-i2c: %d", -ret); - goto err_guest_notifiers; - } - - /* - * guest_notifier_mask/pending not used yet, so just unmask - * everything here. virtio-pci will do the right thing by - * enabling/disabling irqfd. - */ - for (i = 0; i < i2c->vhost_dev.nvqs; i++) { - vhost_virtqueue_mask(&i2c->vhost_dev, vdev, i, false); - } - - return; - -err_guest_notifiers: - k->set_guest_notifiers(qbus->parent, i2c->vhost_dev.nvqs, false); -err_host_notifiers: - vhost_dev_disable_notifiers(&i2c->vhost_dev, vdev); -} - -static void vu_i2c_stop(VirtIODevice *vdev) -{ - VHostUserI2C *i2c = VHOST_USER_I2C(vdev); - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - int ret; - - if (!k->set_guest_notifiers) { - return; - } - - vhost_dev_stop(&i2c->vhost_dev, vdev, true); - - ret = k->set_guest_notifiers(qbus->parent, i2c->vhost_dev.nvqs, false); - if (ret < 0) { - error_report("vhost guest notifier cleanup failed: %d", ret); - return; - } - - vhost_dev_disable_notifiers(&i2c->vhost_dev, vdev); -} - -static void vu_i2c_set_status(VirtIODevice *vdev, uint8_t status) -{ - VHostUserI2C *i2c = VHOST_USER_I2C(vdev); - bool should_start = virtio_device_should_start(vdev, status); - - if (vhost_dev_is_started(&i2c->vhost_dev) == should_start) { - return; - } - - if (should_start) { - vu_i2c_start(vdev); - } else { - vu_i2c_stop(vdev); - } -} - -static uint64_t vu_i2c_get_features(VirtIODevice *vdev, - uint64_t requested_features, Error **errp) -{ - VHostUserI2C *i2c = VHOST_USER_I2C(vdev); - - virtio_add_feature(&requested_features, VIRTIO_I2C_F_ZERO_LENGTH_REQUEST); - return vhost_get_features(&i2c->vhost_dev, feature_bits, requested_features); -} - -static void vu_i2c_handle_output(VirtIODevice *vdev, VirtQueue *vq) -{ - /* - * Not normally called; it's the daemon that handles the queue; - * however virtio's cleanup path can call this. - */ -} - -static void vu_i2c_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) -{ - VHostUserI2C *i2c = VHOST_USER_I2C(vdev); - - /* - * We don't support interrupts, return early if index is set to - * VIRTIO_CONFIG_IRQ_IDX. - */ - if (idx == VIRTIO_CONFIG_IRQ_IDX) { - return; - } - - vhost_virtqueue_mask(&i2c->vhost_dev, vdev, idx, mask); -} - -static bool vu_i2c_guest_notifier_pending(VirtIODevice *vdev, int idx) -{ - VHostUserI2C *i2c = VHOST_USER_I2C(vdev); - - /* - * We don't support interrupts, return early if index is set to - * VIRTIO_CONFIG_IRQ_IDX. - */ - if (idx == VIRTIO_CONFIG_IRQ_IDX) { - return false; - } - - return vhost_virtqueue_pending(&i2c->vhost_dev, idx); -} - -static void do_vhost_user_cleanup(VirtIODevice *vdev, VHostUserI2C *i2c) -{ - vhost_user_cleanup(&i2c->vhost_user); - virtio_delete_queue(i2c->vq); - virtio_cleanup(vdev); -} - -static int vu_i2c_connect(DeviceState *dev) -{ - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserI2C *i2c = VHOST_USER_I2C(vdev); - - if (i2c->connected) { - return 0; - } - i2c->connected = true; - - /* restore vhost state */ - if (virtio_device_started(vdev, vdev->status)) { - vu_i2c_start(vdev); - } - - return 0; -} - -static void vu_i2c_disconnect(DeviceState *dev) -{ - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserI2C *i2c = VHOST_USER_I2C(vdev); - - if (!i2c->connected) { - return; - } - i2c->connected = false; - - if (vhost_dev_is_started(&i2c->vhost_dev)) { - vu_i2c_stop(vdev); - } -} - -static void vu_i2c_event(void *opaque, QEMUChrEvent event) -{ - DeviceState *dev = opaque; - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserI2C *i2c = VHOST_USER_I2C(vdev); - - switch (event) { - case CHR_EVENT_OPENED: - if (vu_i2c_connect(dev) < 0) { - qemu_chr_fe_disconnect(&i2c->chardev); - return; - } - break; - case CHR_EVENT_CLOSED: - vu_i2c_disconnect(dev); - break; - case CHR_EVENT_BREAK: - case CHR_EVENT_MUX_IN: - case CHR_EVENT_MUX_OUT: - /* Ignore */ - break; - } -} - -static void vu_i2c_device_realize(DeviceState *dev, Error **errp) -{ - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserI2C *i2c = VHOST_USER_I2C(dev); - int ret; - - if (!i2c->chardev.chr) { - error_setg(errp, "vhost-user-i2c: missing chardev"); - return; - } - - if (!vhost_user_init(&i2c->vhost_user, &i2c->chardev, errp)) { - return; - } - - virtio_init(vdev, VIRTIO_ID_I2C_ADAPTER, 0); - - i2c->vhost_dev.nvqs = 1; - i2c->vq = virtio_add_queue(vdev, 4, vu_i2c_handle_output); - i2c->vhost_dev.vqs = g_new0(struct vhost_virtqueue, i2c->vhost_dev.nvqs); - - ret = vhost_dev_init(&i2c->vhost_dev, &i2c->vhost_user, - VHOST_BACKEND_TYPE_USER, 0, errp); - if (ret < 0) { - g_free(i2c->vhost_dev.vqs); - do_vhost_user_cleanup(vdev, i2c); - } - - qemu_chr_fe_set_handlers(&i2c->chardev, NULL, NULL, vu_i2c_event, NULL, - dev, NULL, true); -} - -static void vu_i2c_device_unrealize(DeviceState *dev) -{ - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserI2C *i2c = VHOST_USER_I2C(dev); - struct vhost_virtqueue *vhost_vqs = i2c->vhost_dev.vqs; - - /* This will stop vhost backend if appropriate. */ - vu_i2c_set_status(vdev, 0); - vhost_dev_cleanup(&i2c->vhost_dev); - g_free(vhost_vqs); - do_vhost_user_cleanup(vdev, i2c); + vubc->parent_realize(dev, errp); } static const VMStateDescription vu_i2c_vmstate = { @@ -268,30 +37,21 @@ static const VMStateDescription vu_i2c_vmstate = { .unmigratable = 1, }; -static Property vu_i2c_properties[] = { - DEFINE_PROP_CHR("chardev", VHostUserI2C, chardev), - DEFINE_PROP_END_OF_LIST(), -}; - static void vu_i2c_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); - VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + VHostUserBaseClass *vubc = VHOST_USER_BASE_CLASS(klass); - device_class_set_props(dc, vu_i2c_properties); dc->vmsd = &vu_i2c_vmstate; + device_class_set_props(dc, vi2c_properties); + device_class_set_parent_realize(dc, vi2c_realize, + &vubc->parent_realize); set_bit(DEVICE_CATEGORY_INPUT, dc->categories); - vdc->realize = vu_i2c_device_realize; - vdc->unrealize = vu_i2c_device_unrealize; - vdc->get_features = vu_i2c_get_features; - vdc->set_status = vu_i2c_set_status; - vdc->guest_notifier_mask = vu_i2c_guest_notifier_mask; - vdc->guest_notifier_pending = vu_i2c_guest_notifier_pending; } static const TypeInfo vu_i2c_info = { .name = TYPE_VHOST_USER_I2C, - .parent = TYPE_VIRTIO_DEVICE, + .parent = TYPE_VHOST_USER_BASE, .instance_size = sizeof(VHostUserI2C), .class_init = vu_i2c_class_init, }; diff --git a/include/hw/virtio/vhost-user-i2c.h b/include/hw/virtio/vhost-user-i2c.h index 0f7acd40e3..a9b5612ad0 100644 --- a/include/hw/virtio/vhost-user-i2c.h +++ b/include/hw/virtio/vhost-user-i2c.h @@ -9,23 +9,17 @@ #ifndef QEMU_VHOST_USER_I2C_H #define QEMU_VHOST_USER_I2C_H +#include "hw/virtio/virtio.h" #include "hw/virtio/vhost.h" #include "hw/virtio/vhost-user.h" +#include "hw/virtio/vhost-user-base.h" #define TYPE_VHOST_USER_I2C "vhost-user-i2c-device" + OBJECT_DECLARE_SIMPLE_TYPE(VHostUserI2C, VHOST_USER_I2C) struct VHostUserI2C { - VirtIODevice parent; - CharBackend chardev; - struct vhost_virtqueue *vhost_vq; - struct vhost_dev vhost_dev; - VhostUserState vhost_user; - VirtQueue *vq; - bool connected; + VHostUserBase parent_obj; }; -/* Virtio Feature bits */ -#define VIRTIO_I2C_F_ZERO_LENGTH_REQUEST 0 - #endif /* QEMU_VHOST_USER_I2C_H */ From 4ae0fc18a1dc91bfe3a494292faf3c4c1b2cc16c Mon Sep 17 00:00:00 2001 From: Manos Pitsidianakis Date: Thu, 4 Jan 2024 21:09:40 +0000 Subject: [PATCH 06/60] hw/virtio: add vhost-user-snd and vhost-user-snd-pci devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tested with rust-vmm vhost-user-sound daemon: RUST_LOG=trace cargo run --bin vhost-user-sound -- --socket /tmp/snd.sock --backend null Invocation: qemu-system-x86_64 \ -qmp unix:./qmp-sock,server,wait=off \ -m 4096 \ -numa node,memdev=mem \ -object memory-backend-file,id=mem,size=4G,mem-path=/dev/shm,share=on \ -D qemu.log \ -d guest_errors,trace:\*snd\*,trace:\*sound\*,trace:\*vhost\* \ -chardev socket,id=vsnd,path=/tmp/snd.sock \ -device vhost-user-snd-pci,chardev=vsnd,id=snd \ /path/to/disk [AJB: imported from https://github.com/epilys/qemu-virtio-snd/commit/54ae1cdd15fef2d88e9e387a175f099a38c636f4.patch] Signed-off-by: Alex Bennée Signed-off-by: Manos Pitsidianakis Message-Id: <20240104210945.1223134-7-alex.bennee@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 7 +++ hw/virtio/Kconfig | 5 ++ hw/virtio/meson.build | 3 ++ hw/virtio/vhost-user-snd-pci.c | 75 ++++++++++++++++++++++++++++++ hw/virtio/vhost-user-snd.c | 67 ++++++++++++++++++++++++++ include/hw/virtio/vhost-user-snd.h | 24 ++++++++++ 6 files changed, 181 insertions(+) create mode 100644 hw/virtio/vhost-user-snd-pci.c create mode 100644 hw/virtio/vhost-user-snd.c create mode 100644 include/hw/virtio/vhost-user-snd.h diff --git a/MAINTAINERS b/MAINTAINERS index 2426368c4d..aff5342cb4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2342,6 +2342,13 @@ F: hw/virtio/vhost-user-gpio* F: include/hw/virtio/vhost-user-gpio.h F: tests/qtest/libqos/virtio-gpio.* +vhost-user-snd +M: Alex Bennée +R: Manos Pitsidianakis +S: Maintained +F: hw/virtio/vhost-user-snd* +F: include/hw/virtio/vhost-user-snd.h + vhost-user-scmi R: mzamazal@redhat.com S: Supported diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index 92c9cf6c96..aa63ff7fd4 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -101,6 +101,11 @@ config VHOST_VDPA_DEV default y depends on VIRTIO && VHOST_VDPA && LINUX +config VHOST_USER_SND + bool + default y + depends on VIRTIO && VHOST_USER + config VHOST_USER_SCMI bool default y diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 685d586e69..af8abae020 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -24,6 +24,7 @@ if have_vhost system_virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) system_virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) system_virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) + system_virtio_ss.add(when: 'CONFIG_VHOST_USER_SND', if_true: files('vhost-user-snd.c')) # PCI Stubs system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) @@ -33,6 +34,8 @@ if have_vhost if_true: files('vhost-user-i2c-pci.c')) system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_RNG'], if_true: files('vhost-user-rng-pci.c')) + system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_SND'], + if_true: files('vhost-user-snd-pci.c')) endif if have_vhost_vdpa system_virtio_ss.add(files('vhost-vdpa.c')) diff --git a/hw/virtio/vhost-user-snd-pci.c b/hw/virtio/vhost-user-snd-pci.c new file mode 100644 index 0000000000..d61cfdae63 --- /dev/null +++ b/hw/virtio/vhost-user-snd-pci.c @@ -0,0 +1,75 @@ +/* + * Vhost-user Sound virtio device PCI glue + * + * Copyright (c) 2023 Manos Pitsidianakis + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/vhost-user-snd.h" +#include "hw/virtio/virtio-pci.h" + +struct VHostUserSoundPCI { + VirtIOPCIProxy parent_obj; + VHostUserSound vdev; +}; + +typedef struct VHostUserSoundPCI VHostUserSoundPCI; + +#define TYPE_VHOST_USER_SND_PCI "vhost-user-snd-pci-base" + +DECLARE_INSTANCE_CHECKER(VHostUserSoundPCI, VHOST_USER_SND_PCI, + TYPE_VHOST_USER_SND_PCI) + +static Property vhost_user_snd_pci_properties[] = { + DEFINE_PROP_END_OF_LIST(), +}; + +static void vhost_user_snd_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VHostUserSoundPCI *dev = VHOST_USER_SND_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + + vpci_dev->nvectors = 1; + + qdev_realize(vdev, BUS(&vpci_dev->bus), errp); +} + +static void vhost_user_snd_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + k->realize = vhost_user_snd_pci_realize; + set_bit(DEVICE_CATEGORY_SOUND, dc->categories); + device_class_set_props(dc, vhost_user_snd_pci_properties); + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = 0; /* Set by virtio-pci based on virtio id */ + pcidev_k->revision = 0x00; + pcidev_k->class_id = PCI_CLASS_MULTIMEDIA_AUDIO; +} + +static void vhost_user_snd_pci_instance_init(Object *obj) +{ + VHostUserSoundPCI *dev = VHOST_USER_SND_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VHOST_USER_SND); +} + +static const VirtioPCIDeviceTypeInfo vhost_user_snd_pci_info = { + .base_name = TYPE_VHOST_USER_SND_PCI, + .non_transitional_name = "vhost-user-snd-pci", + .instance_size = sizeof(VHostUserSoundPCI), + .instance_init = vhost_user_snd_pci_instance_init, + .class_init = vhost_user_snd_pci_class_init, +}; + +static void vhost_user_snd_pci_register(void) +{ + virtio_pci_types_register(&vhost_user_snd_pci_info); +} + +type_init(vhost_user_snd_pci_register); diff --git a/hw/virtio/vhost-user-snd.c b/hw/virtio/vhost-user-snd.c new file mode 100644 index 0000000000..9a217543f8 --- /dev/null +++ b/hw/virtio/vhost-user-snd.c @@ -0,0 +1,67 @@ +/* + * Vhost-user snd virtio device + * + * Copyright (c) 2023 Manos Pitsidianakis + * + * Simple wrapper of the generic vhost-user-device. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/vhost-user-snd.h" +#include "standard-headers/linux/virtio_ids.h" +#include "standard-headers/linux/virtio_snd.h" + +static const VMStateDescription vu_snd_vmstate = { + .name = "vhost-user-snd", + .unmigratable = 1, +}; + +static Property vsnd_properties[] = { + DEFINE_PROP_CHR("chardev", VHostUserBase, chardev), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vu_snd_base_realize(DeviceState *dev, Error **errp) +{ + VHostUserBase *vub = VHOST_USER_BASE(dev); + VHostUserBaseClass *vubs = VHOST_USER_BASE_GET_CLASS(dev); + + vub->virtio_id = VIRTIO_ID_SOUND; + vub->num_vqs = 4; + vub->config_size = sizeof(struct virtio_snd_config); + vub->vq_size = 64; + + vubs->parent_realize(dev, errp); +} + +static void vu_snd_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VHostUserBaseClass *vubc = VHOST_USER_BASE_CLASS(klass); + + dc->vmsd = &vu_snd_vmstate; + device_class_set_props(dc, vsnd_properties); + device_class_set_parent_realize(dc, vu_snd_base_realize, + &vubc->parent_realize); + + set_bit(DEVICE_CATEGORY_SOUND, dc->categories); +} + +static const TypeInfo vu_snd_info = { + .name = TYPE_VHOST_USER_SND, + .parent = TYPE_VHOST_USER_BASE, + .instance_size = sizeof(VHostUserSound), + .class_init = vu_snd_class_init, +}; + +static void vu_snd_register_types(void) +{ + type_register_static(&vu_snd_info); +} + +type_init(vu_snd_register_types) diff --git a/include/hw/virtio/vhost-user-snd.h b/include/hw/virtio/vhost-user-snd.h new file mode 100644 index 0000000000..f9260116a7 --- /dev/null +++ b/include/hw/virtio/vhost-user-snd.h @@ -0,0 +1,24 @@ +/* + * Vhost-user Sound virtio device + * + * Copyright (c) 2021 Mathieu Poirier + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef QEMU_VHOST_USER_SND_H +#define QEMU_VHOST_USER_SND_H + +#include "hw/virtio/virtio.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user.h" +#include "hw/virtio/vhost-user-base.h" + +#define TYPE_VHOST_USER_SND "vhost-user-snd" +OBJECT_DECLARE_SIMPLE_TYPE(VHostUserSound, VHOST_USER_SND) + +struct VHostUserSound { + VHostUserBase parent_obj; +}; + +#endif /* QEMU_VHOST_USER_SND_H */ From e7fe912276c0b5859404327b6ab9ce332a2a887b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Thu, 4 Jan 2024 21:09:41 +0000 Subject: [PATCH 07/60] docs/system: add a basic enumeration of vhost-user devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make it clear the vhost-user-device is intended for expert use only. Signed-off-by: Alex Bennée Message-Id: <20240104210945.1223134-8-alex.bennee@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/system/devices/vhost-user-rng.rst | 2 + docs/system/devices/vhost-user.rst | 70 +++++++++++++++++++++++++- 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/docs/system/devices/vhost-user-rng.rst b/docs/system/devices/vhost-user-rng.rst index a145d4105c..ead1405326 100644 --- a/docs/system/devices/vhost-user-rng.rst +++ b/docs/system/devices/vhost-user-rng.rst @@ -1,3 +1,5 @@ +.. _vhost_user_rng: + QEMU vhost-user-rng - RNG emulation =================================== diff --git a/docs/system/devices/vhost-user.rst b/docs/system/devices/vhost-user.rst index a80e95a48a..c6afc4836f 100644 --- a/docs/system/devices/vhost-user.rst +++ b/docs/system/devices/vhost-user.rst @@ -8,13 +8,81 @@ outside of QEMU itself. To do this there are a number of things required. vhost-user device -=================== +================= These are simple stub devices that ensure the VirtIO device is visible to the guest. The code is mostly boilerplate although each device has a ``chardev`` option which specifies the ID of the ``--chardev`` device that connects via a socket to the vhost-user *daemon*. +Each device will have an virtio-mmio and virtio-pci variant. See your +platform details for what sort of virtio bus to use. + +.. list-table:: vhost-user devices + :widths: 20 20 60 + :header-rows: 1 + + * - Device + - Type + - Notes + * - vhost-user-blk + - Block storage + - See contrib/vhost-user-blk + * - vhost-user-fs + - File based storage driver + - See https://gitlab.com/virtio-fs/virtiofsd + * - vhost-user-gpio + - Proxy gpio pins to host + - See https://github.com/rust-vmm/vhost-device + * - vhost-user-gpu + - GPU driver + - See contrib/vhost-user-gpu + * - vhost-user-i2c + - Proxy i2c devices to host + - See https://github.com/rust-vmm/vhost-device + * - vhost-user-input + - Generic input driver + - See contrib/vhost-user-input + * - vhost-user-rng + - Entropy driver + - :ref:`vhost_user_rng` + * - vhost-user-scmi + - System Control and Management Interface + - See https://github.com/rust-vmm/vhost-device + * - vhost-user-snd + - Audio device + - See https://github.com/rust-vmm/vhost-device/staging + * - vhost-user-scsi + - SCSI based storage + - See contrib/vhost-user-scsi + * - vhost-user-vsock + - Socket based communication + - See https://github.com/rust-vmm/vhost-device + +The referenced *daemons* are not exhaustive, any conforming backend +implementing the device and using the vhost-user protocol should work. + +vhost-user-device +^^^^^^^^^^^^^^^^^ + +The vhost-user-device is a generic development device intended for +expert use while developing new backends. The user needs to specify +all the required parameters including: + + - Device ``virtio-id`` + - The ``num_vqs`` it needs and their ``vq_size`` + - The ``config_size`` if needed + +.. note:: + To prevent user confusion you cannot currently instantiate + vhost-user-device without first patching out:: + + /* Reason: stop inexperienced users confusing themselves */ + dc->user_creatable = false; + + in ``vhost-user-device.c`` and ``vhost-user-device-pci.c`` file and + rebuilding. + vhost-user daemon ================= From a26105dd56a11d5aec618fc5429ae7932d3221c5 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 4 Jan 2024 21:09:42 +0000 Subject: [PATCH 08/60] hw/virtio: Support set_config() callback in vhost-user-base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Virtio input device invokes set_config() callback for retrieving the event configuration info, but the callback is not supported in vhost-user-base. This patch adds support set_config() callback in vhost-user-base. Signed-off-by: Leo Yan Reviewed-by: Marc-André Lureau Message-Id: <20231120043721.50555-2-leo.yan@linaro.org> Signed-off-by: Alex Bennée Message-Id: <20240104210945.1223134-9-alex.bennee@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user-base.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c index 78cfa9a5bb..a83167191e 100644 --- a/hw/virtio/vhost-user-base.c +++ b/hw/virtio/vhost-user-base.c @@ -140,6 +140,22 @@ static void vub_get_config(VirtIODevice *vdev, uint8_t *config) } } +static void vub_set_config(VirtIODevice *vdev, const uint8_t *config_data) +{ + VHostUserBase *vub = VHOST_USER_BASE(vdev); + int ret; + + g_assert(vub->config_size && vub->vhost_user.supports_config == true); + + ret = vhost_dev_set_config(&vub->vhost_dev, config_data, + 0, vub->config_size, + VHOST_SET_CONFIG_TYPE_FRONTEND); + if (ret) { + error_report("vhost guest set device config space failed: %d", ret); + return; + } +} + /* * When the daemon signals an update to the config we just need to * signal the guest as we re-read the config on demand above. @@ -337,6 +353,7 @@ static void vub_class_init(ObjectClass *klass, void *data) vdc->unrealize = vub_device_unrealize; vdc->get_features = vub_get_features; vdc->get_config = vub_get_config; + vdc->set_config = vub_set_config; vdc->set_status = vub_set_status; } From 887d5775863b8804bacba6fe1a860ed3ea5cfdd9 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 4 Jan 2024 21:09:43 +0000 Subject: [PATCH 09/60] docs/system: Add vhost-user-input documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds basic documentation for vhost-user-input. Signed-off-by: Leo Yan Message-Id: <20231120043721.50555-3-leo.yan@linaro.org> Signed-off-by: Alex Bennée Message-Id: <20240104210945.1223134-10-alex.bennee@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 1 + docs/system/device-emulation.rst | 1 + docs/system/devices/vhost-user-input.rst | 45 ++++++++++++++++++++++++ docs/system/devices/vhost-user.rst | 4 ++- 4 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 docs/system/devices/vhost-user-input.rst diff --git a/MAINTAINERS b/MAINTAINERS index aff5342cb4..66c9e81c55 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2289,6 +2289,7 @@ L: virtio-fs@lists.linux.dev virtio-input M: Gerd Hoffmann S: Odd Fixes +F: docs/system/devices/vhost-user-input.rst F: hw/input/vhost-user-input.c F: hw/input/virtio-input*.c F: include/hw/virtio/virtio-input.h diff --git a/docs/system/device-emulation.rst b/docs/system/device-emulation.rst index d1f3277cb0..f19777411c 100644 --- a/docs/system/device-emulation.rst +++ b/docs/system/device-emulation.rst @@ -94,6 +94,7 @@ Emulated Devices devices/virtio-gpu.rst devices/virtio-pmem.rst devices/virtio-snd.rst + devices/vhost-user-input.rst devices/vhost-user-rng.rst devices/canokey.rst devices/usb-u2f.rst diff --git a/docs/system/devices/vhost-user-input.rst b/docs/system/devices/vhost-user-input.rst new file mode 100644 index 0000000000..118eb78101 --- /dev/null +++ b/docs/system/devices/vhost-user-input.rst @@ -0,0 +1,45 @@ +.. _vhost_user_input: + +QEMU vhost-user-input - Input emulation +======================================= + +This document describes the setup and usage of the Virtio input device. +The Virtio input device is a paravirtualized device for input events. + +Description +----------- + +The vhost-user-input device implementation was designed to work with a daemon +polling on input devices and passes input events to the guest. + +QEMU provides a backend implementation in contrib/vhost-user-input. + +Linux kernel support +-------------------- + +Virtio input requires a guest Linux kernel built with the +``CONFIG_VIRTIO_INPUT`` option. + +Examples +-------- + +The backend daemon should be started first: + +:: + + host# vhost-user-input --socket-path=input.sock \ + --evdev-path=/dev/input/event17 + +The QEMU invocation needs to create a chardev socket to communicate with the +backend daemon and access the VirtIO queues with the guest over the +:ref:`shared memory `. + +:: + + host# qemu-system \ + -chardev socket,path=/tmp/input.sock,id=mouse0 \ + -device vhost-user-input-pci,chardev=mouse0 \ + -m 4096 \ + -object memory-backend-file,id=mem,size=4G,mem-path=/dev/shm,share=on \ + -numa node,memdev=mem \ + ... diff --git a/docs/system/devices/vhost-user.rst b/docs/system/devices/vhost-user.rst index c6afc4836f..9b2da106ce 100644 --- a/docs/system/devices/vhost-user.rst +++ b/docs/system/devices/vhost-user.rst @@ -42,7 +42,7 @@ platform details for what sort of virtio bus to use. - See https://github.com/rust-vmm/vhost-device * - vhost-user-input - Generic input driver - - See contrib/vhost-user-input + - :ref:`vhost_user_input` * - vhost-user-rng - Entropy driver - :ref:`vhost_user_rng` @@ -91,6 +91,8 @@ following the :ref:`vhost_user_proto`. There are a number of daemons that can be built when enabled by the project although any daemon that meets the specification for a given device can be used. +.. _shared_memory_object: + Shared memory object ==================== From 87c7fb7819962e052a69046167949fe266611abf Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 4 Jan 2024 21:09:44 +0000 Subject: [PATCH 10/60] hw/virtio: Move vhost-user-input into virtio folder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vhost-user-input is in the input folder. On the other hand, the folder 'hw/virtio' maintains other virtio stubs (e.g. I2C, RNG, GPIO, etc). This patch moves vhost-user-input into the virtio folder for better code organization. No functionality change. Signed-off-by: Leo Yan Reviewed-by: Manos Pitsidianakis Message-Id: <20231120043721.50555-4-leo.yan@linaro.org> Signed-off-by: Alex Bennée Message-Id: <20240104210945.1223134-11-alex.bennee@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 2 +- hw/input/meson.build | 1 - hw/virtio/meson.build | 4 +++- hw/{input => virtio}/vhost-user-input.c | 0 4 files changed, 4 insertions(+), 3 deletions(-) rename hw/{input => virtio}/vhost-user-input.c (100%) diff --git a/MAINTAINERS b/MAINTAINERS index 66c9e81c55..89f2d31f70 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2290,8 +2290,8 @@ virtio-input M: Gerd Hoffmann S: Odd Fixes F: docs/system/devices/vhost-user-input.rst -F: hw/input/vhost-user-input.c F: hw/input/virtio-input*.c +F: hw/virtio/vhost-user-input.c F: include/hw/virtio/virtio-input.h F: contrib/vhost-user-input/* diff --git a/hw/input/meson.build b/hw/input/meson.build index 640556bbbc..3cc8ab85f0 100644 --- a/hw/input/meson.build +++ b/hw/input/meson.build @@ -11,7 +11,6 @@ system_ss.add(when: 'CONFIG_TSC2005', if_true: files('tsc2005.c')) system_ss.add(when: 'CONFIG_VIRTIO_INPUT', if_true: files('virtio-input.c')) system_ss.add(when: 'CONFIG_VIRTIO_INPUT', if_true: files('virtio-input-hid.c')) system_ss.add(when: 'CONFIG_VIRTIO_INPUT_HOST', if_true: files('virtio-input-host.c')) -system_ss.add(when: 'CONFIG_VHOST_USER_INPUT', if_true: files('vhost-user-input.c')) system_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx_keypad.c')) system_ss.add(when: 'CONFIG_TSC210X', if_true: files('tsc210x.c')) diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index af8abae020..d7f18c96e6 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -25,6 +25,7 @@ if have_vhost system_virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) system_virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) system_virtio_ss.add(when: 'CONFIG_VHOST_USER_SND', if_true: files('vhost-user-snd.c')) + system_virtio_ss.add(when: 'CONFIG_VHOST_USER_INPUT', if_true: files('vhost-user-input.c')) # PCI Stubs system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) @@ -36,6 +37,8 @@ if have_vhost if_true: files('vhost-user-rng-pci.c')) system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_SND'], if_true: files('vhost-user-snd-pci.c')) + system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_INPUT'], + if_true: files('vhost-user-input-pci.c')) endif if have_vhost_vdpa system_virtio_ss.add(files('vhost-vdpa.c')) @@ -59,7 +62,6 @@ virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk-pci.c')) -virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_INPUT', if_true: files('vhost-user-input-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_FS', if_true: files('vhost-user-fs-pci.c')) diff --git a/hw/input/vhost-user-input.c b/hw/virtio/vhost-user-input.c similarity index 100% rename from hw/input/vhost-user-input.c rename to hw/virtio/vhost-user-input.c From bad38726e9dc52d840d151a1ba38b5614b521feb Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 4 Jan 2024 21:09:45 +0000 Subject: [PATCH 11/60] hw/virtio: derive vhost-user-input from vhost-user-base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch derives vhost-user-input from vhost-user-base class, so make the input stub as a simpler boilerplate wrapper. With the refactoring, vhost-user-input adds the property 'chardev', this leads to conflict with the vhost-user-input-pci adds the same property. To resolve the error, remove the duplicate property from vhost-user-input-pci. Signed-off-by: Leo Yan Reviewed-by: Manos Pitsidianakis Message-Id: <20231120043721.50555-5-leo.yan@linaro.org> Signed-off-by: Alex Bennée Message-Id: <20240104210945.1223134-12-alex.bennee@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user-input-pci.c | 3 - hw/virtio/vhost-user-input.c | 114 +++++-------------------------- include/hw/virtio/virtio-input.h | 6 +- 3 files changed, 21 insertions(+), 102 deletions(-) diff --git a/hw/virtio/vhost-user-input-pci.c b/hw/virtio/vhost-user-input-pci.c index b858898a36..3f4761ce88 100644 --- a/hw/virtio/vhost-user-input-pci.c +++ b/hw/virtio/vhost-user-input-pci.c @@ -30,9 +30,6 @@ static void vhost_user_input_pci_instance_init(Object *obj) virtio_instance_init_common(obj, &dev->vhi, sizeof(dev->vhi), TYPE_VHOST_USER_INPUT); - - object_property_add_alias(obj, "chardev", - OBJECT(&dev->vhi), "chardev"); } static const VirtioPCIDeviceTypeInfo vhost_user_input_pci_info = { diff --git a/hw/virtio/vhost-user-input.c b/hw/virtio/vhost-user-input.c index 4ee3542106..bedec0468c 100644 --- a/hw/virtio/vhost-user-input.c +++ b/hw/virtio/vhost-user-input.c @@ -5,83 +5,25 @@ */ #include "qemu/osdep.h" -#include "qemu/error-report.h" -#include "qapi/error.h" - #include "hw/virtio/virtio-input.h" -static int vhost_input_config_change(struct vhost_dev *dev) -{ - error_report("vhost-user-input: unhandled backend config change"); - return -1; -} - -static const VhostDevConfigOps config_ops = { - .vhost_dev_config_notifier = vhost_input_config_change, +static Property vinput_properties[] = { + DEFINE_PROP_CHR("chardev", VHostUserBase, chardev), + DEFINE_PROP_END_OF_LIST(), }; -static void vhost_input_realize(DeviceState *dev, Error **errp) +static void vinput_realize(DeviceState *dev, Error **errp) { - VHostUserInput *vhi = VHOST_USER_INPUT(dev); - VirtIOInput *vinput = VIRTIO_INPUT(dev); - VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBase *vub = VHOST_USER_BASE(dev); + VHostUserBaseClass *vubc = VHOST_USER_BASE_GET_CLASS(dev); - vhost_dev_set_config_notifier(&vhi->vhost->dev, &config_ops); - vinput->cfg_size = sizeof_field(virtio_input_config, u); - if (vhost_user_backend_dev_init(vhi->vhost, vdev, 2, errp) == -1) { - return; - } -} + /* Fixed for input device */ + vub->virtio_id = VIRTIO_ID_INPUT; + vub->num_vqs = 2; + vub->vq_size = 4; + vub->config_size = sizeof(virtio_input_config); -static void vhost_input_change_active(VirtIOInput *vinput) -{ - VHostUserInput *vhi = VHOST_USER_INPUT(vinput); - - if (vinput->active) { - vhost_user_backend_start(vhi->vhost); - } else { - vhost_user_backend_stop(vhi->vhost); - } -} - -static void vhost_input_get_config(VirtIODevice *vdev, uint8_t *config_data) -{ - VirtIOInput *vinput = VIRTIO_INPUT(vdev); - VHostUserInput *vhi = VHOST_USER_INPUT(vdev); - Error *local_err = NULL; - int ret; - - memset(config_data, 0, vinput->cfg_size); - - ret = vhost_dev_get_config(&vhi->vhost->dev, config_data, vinput->cfg_size, - &local_err); - if (ret) { - error_report_err(local_err); - return; - } -} - -static void vhost_input_set_config(VirtIODevice *vdev, - const uint8_t *config_data) -{ - VHostUserInput *vhi = VHOST_USER_INPUT(vdev); - int ret; - - ret = vhost_dev_set_config(&vhi->vhost->dev, config_data, - 0, sizeof(virtio_input_config), - VHOST_SET_CONFIG_TYPE_FRONTEND); - if (ret) { - error_report("vhost-user-input: set device config space failed"); - return; - } - - virtio_notify_config(vdev); -} - -static struct vhost_dev *vhost_input_get_vhost(VirtIODevice *vdev) -{ - VHostUserInput *vhi = VHOST_USER_INPUT(vdev); - return &vhi->vhost->dev; + vubc->parent_realize(dev, errp); } static const VMStateDescription vmstate_vhost_input = { @@ -91,40 +33,20 @@ static const VMStateDescription vmstate_vhost_input = { static void vhost_input_class_init(ObjectClass *klass, void *data) { - VirtIOInputClass *vic = VIRTIO_INPUT_CLASS(klass); - VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + VHostUserBaseClass *vubc = VHOST_USER_BASE_CLASS(klass); DeviceClass *dc = DEVICE_CLASS(klass); dc->vmsd = &vmstate_vhost_input; - vdc->get_config = vhost_input_get_config; - vdc->set_config = vhost_input_set_config; - vdc->get_vhost = vhost_input_get_vhost; - vic->realize = vhost_input_realize; - vic->change_active = vhost_input_change_active; -} - -static void vhost_input_init(Object *obj) -{ - VHostUserInput *vhi = VHOST_USER_INPUT(obj); - - vhi->vhost = VHOST_USER_BACKEND(object_new(TYPE_VHOST_USER_BACKEND)); - object_property_add_alias(obj, "chardev", - OBJECT(vhi->vhost), "chardev"); -} - -static void vhost_input_finalize(Object *obj) -{ - VHostUserInput *vhi = VHOST_USER_INPUT(obj); - - object_unref(OBJECT(vhi->vhost)); + device_class_set_props(dc, vinput_properties); + device_class_set_parent_realize(dc, vinput_realize, + &vubc->parent_realize); + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); } static const TypeInfo vhost_input_info = { .name = TYPE_VHOST_USER_INPUT, - .parent = TYPE_VIRTIO_INPUT, + .parent = TYPE_VHOST_USER_BASE, .instance_size = sizeof(VHostUserInput), - .instance_init = vhost_input_init, - .instance_finalize = vhost_input_finalize, .class_init = vhost_input_class_init, }; diff --git a/include/hw/virtio/virtio-input.h b/include/hw/virtio/virtio-input.h index a6c9703644..e69c0aeca3 100644 --- a/include/hw/virtio/virtio-input.h +++ b/include/hw/virtio/virtio-input.h @@ -1,6 +1,8 @@ #ifndef QEMU_VIRTIO_INPUT_H #define QEMU_VIRTIO_INPUT_H +#include "hw/virtio/vhost-user.h" +#include "hw/virtio/vhost-user-base.h" #include "ui/input.h" #include "sysemu/vhost-user-backend.h" @@ -97,9 +99,7 @@ struct VirtIOInputHost { }; struct VHostUserInput { - VirtIOInput parent_obj; - - VhostUserBackend *vhost; + VHostUserBase parent_obj; }; void virtio_input_send(VirtIOInput *vinput, virtio_input_event *event); From b2101358e591c9f0a93739dd3aee72935a79af80 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Thu, 11 Jan 2024 22:43:58 +0700 Subject: [PATCH 12/60] i386/tcg: implement x2APIC registers MSR access This commit creates apic_register_read/write which are used by both apic_mem_read/write for MMIO access and apic_msr_read/write for MSR access. The apic_msr_read/write returns -1 on error, accelerator can use this to raise the appropriate exception. Signed-off-by: Bui Quang Minh Message-Id: <20240111154404.5333-2-minhquangbui99@gmail.com> Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/intc/apic.c | 122 ++++++++++++++++++++------- hw/intc/trace-events | 4 +- include/hw/i386/apic.h | 3 + target/i386/cpu.h | 3 + target/i386/tcg/sysemu/misc_helper.c | 27 ++++++ 5 files changed, 127 insertions(+), 32 deletions(-) diff --git a/hw/intc/apic.c b/hw/intc/apic.c index ac3d47d231..7a349c0723 100644 --- a/hw/intc/apic.c +++ b/hw/intc/apic.c @@ -288,6 +288,13 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode); } +bool is_x2apic_mode(DeviceState *dev) +{ + APICCommonState *s = APIC(dev); + + return s->apicbase & MSR_IA32_APICBASE_EXTD; +} + static void apic_set_base(APICCommonState *s, uint64_t val) { s->apicbase = (val & 0xfffff000) | @@ -636,24 +643,19 @@ static void apic_timer(void *opaque) apic_timer_update(s, s->next_time); } -static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size) +static int apic_register_read(int index, uint64_t *value) { DeviceState *dev; APICCommonState *s; uint32_t val; - int index; - - if (size < 4) { - return 0; - } + int ret = 0; dev = cpu_get_current_apic(); if (!dev) { - return 0; + return -1; } s = APIC(dev); - index = (addr >> 4) & 0xff; switch(index) { case 0x02: /* id */ val = s->id << 24; @@ -718,12 +720,46 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size) default: s->esr |= APIC_ESR_ILLEGAL_ADDRESS; val = 0; + ret = -1; break; } - trace_apic_mem_readl(addr, val); + + trace_apic_register_read(index, val); + *value = val; + return ret; +} + +static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size) +{ + uint64_t val; + int index; + + if (size < 4) { + return 0; + } + + index = (addr >> 4) & 0xff; + apic_register_read(index, &val); + return val; } +int apic_msr_read(int index, uint64_t *val) +{ + DeviceState *dev; + + dev = cpu_get_current_apic(); + if (!dev) { + return -1; + } + + if (!is_x2apic_mode(dev)) { + return -1; + } + + return apic_register_read(index, val); +} + static void apic_send_msi(MSIMessage *msi) { uint64_t addr = msi->address; @@ -737,35 +773,18 @@ static void apic_send_msi(MSIMessage *msi) apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode); } -static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val, - unsigned size) +static int apic_register_write(int index, uint64_t val) { DeviceState *dev; APICCommonState *s; - int index = (addr >> 4) & 0xff; - - if (size < 4) { - return; - } - - if (addr > 0xfff || !index) { - /* MSI and MMIO APIC are at the same memory location, - * but actually not on the global bus: MSI is on PCI bus - * APIC is connected directly to the CPU. - * Mapping them on the global bus happens to work because - * MSI registers are reserved in APIC MMIO and vice versa. */ - MSIMessage msi = { .address = addr, .data = val }; - apic_send_msi(&msi); - return; - } dev = cpu_get_current_apic(); if (!dev) { - return; + return -1; } s = APIC(dev); - trace_apic_mem_writel(addr, val); + trace_apic_register_write(index, val); switch(index) { case 0x02: @@ -839,8 +858,51 @@ static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val, break; default: s->esr |= APIC_ESR_ILLEGAL_ADDRESS; - break; + return -1; } + + return 0; +} + +static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size) +{ + int index = (addr >> 4) & 0xff; + + if (size < 4) { + return; + } + + if (addr > 0xfff || !index) { + /* + * MSI and MMIO APIC are at the same memory location, + * but actually not on the global bus: MSI is on PCI bus + * APIC is connected directly to the CPU. + * Mapping them on the global bus happens to work because + * MSI registers are reserved in APIC MMIO and vice versa. + */ + MSIMessage msi = { .address = addr, .data = val }; + apic_send_msi(&msi); + return; + } + + apic_register_write(index, val); +} + +int apic_msr_write(int index, uint64_t val) +{ + DeviceState *dev; + + dev = cpu_get_current_apic(); + if (!dev) { + return -1; + } + + if (!is_x2apic_mode(dev)) { + return -1; + } + + return apic_register_write(index, val); } static void apic_pre_save(APICCommonState *s) diff --git a/hw/intc/trace-events b/hw/intc/trace-events index 36ff71f947..1ef29d0256 100644 --- a/hw/intc/trace-events +++ b/hw/intc/trace-events @@ -14,8 +14,8 @@ cpu_get_apic_base(uint64_t val) "0x%016"PRIx64 # apic.c apic_local_deliver(int vector, uint32_t lvt) "vector %d delivery mode %d" apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, uint8_t vector_num, uint8_t trigger_mode) "dest %d dest_mode %d delivery_mode %d vector %d trigger_mode %d" -apic_mem_readl(uint64_t addr, uint32_t val) "0x%"PRIx64" = 0x%08x" -apic_mem_writel(uint64_t addr, uint32_t val) "0x%"PRIx64" = 0x%08x" +apic_register_read(uint8_t reg, uint64_t val) "register 0x%02x = 0x%"PRIx64 +apic_register_write(uint8_t reg, uint64_t val) "register 0x%02x = 0x%"PRIx64 # ioapic.c ioapic_set_remote_irr(int n) "set remote irr for pin %d" diff --git a/include/hw/i386/apic.h b/include/hw/i386/apic.h index bdc15a7a73..ddea4213db 100644 --- a/include/hw/i386/apic.h +++ b/include/hw/i386/apic.h @@ -18,6 +18,9 @@ void apic_sipi(DeviceState *s); void apic_poll_irq(DeviceState *d); void apic_designate_bsp(DeviceState *d, bool bsp); int apic_get_highest_priority_irr(DeviceState *dev); +int apic_msr_read(int index, uint64_t *val); +int apic_msr_write(int index, uint64_t val); +bool is_x2apic_mode(DeviceState *d); /* pc.c */ DeviceState *cpu_get_current_apic(void); diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 6a5b180ccb..afabdeab75 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -545,6 +545,9 @@ typedef enum X86Seg { #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 #define MSR_IA32_VMX_VMFUNC 0x00000491 +#define MSR_APIC_START 0x00000800 +#define MSR_APIC_END 0x000008ff + #define XSTATE_FP_BIT 0 #define XSTATE_SSE_BIT 1 #define XSTATE_YMM_BIT 2 diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c index 1ddfc9fe09..1c43a9f4f7 100644 --- a/target/i386/tcg/sysemu/misc_helper.c +++ b/target/i386/tcg/sysemu/misc_helper.c @@ -25,6 +25,7 @@ #include "exec/address-spaces.h" #include "exec/exec-all.h" #include "tcg/helper-tcg.h" +#include "hw/i386/apic.h" void helper_outb(CPUX86State *env, uint32_t port, uint32_t data) { @@ -289,6 +290,19 @@ void helper_wrmsr(CPUX86State *env) env->msr_bndcfgs = val; cpu_sync_bndcs_hflags(env); break; + case MSR_APIC_START ... MSR_APIC_END: { + int ret; + int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START; + + bql_lock(); + ret = apic_msr_write(index, val); + bql_unlock(); + if (ret < 0) { + goto error; + } + + break; + } default: if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + @@ -455,6 +469,19 @@ void helper_rdmsr(CPUX86State *env) val = (cs->nr_threads * cs->nr_cores) | (cs->nr_cores << 16); break; } + case MSR_APIC_START ... MSR_APIC_END: { + int ret; + int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START; + + bql_lock(); + ret = apic_msr_read(index, &val); + bql_unlock(); + if (ret < 0) { + raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC()); + } + + break; + } default: if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + From b5ee0468e9d28c6bd47cce70f90b5032dd10ecc2 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Thu, 11 Jan 2024 22:43:59 +0700 Subject: [PATCH 13/60] apic: add support for x2APIC mode This commit extends the APIC ID to 32-bit long and remove the 255 max APIC ID limit in userspace APIC. The array that manages local APICs is now dynamically allocated based on the max APIC ID of created x86 machine. Also, new x2APIC IPI destination determination scheme, self IPI and x2APIC mode register access are supported. Signed-off-by: Bui Quang Minh Message-Id: <20240111154404.5333-3-minhquangbui99@gmail.com> Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/x86.c | 6 +- hw/intc/apic.c | 287 ++++++++++++++++++++++++-------- hw/intc/apic_common.c | 9 + include/hw/i386/apic.h | 3 +- include/hw/i386/apic_internal.h | 7 +- target/i386/cpu-sysemu.c | 18 +- target/i386/cpu.h | 2 + 7 files changed, 258 insertions(+), 74 deletions(-) diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 2b6291ad8d..3d1bdd334e 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -137,7 +137,7 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) * a literal `0` in configurations where kvm_* aren't defined) */ if (kvm_enabled() && x86ms->apic_id_limit > 255 && - (!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) { + kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) { error_report("current -smp configuration requires kernel " "irqchip and X2APIC API support."); exit(EXIT_FAILURE); @@ -147,6 +147,10 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) kvm_set_max_apic_id(x86ms->apic_id_limit); } + if (!kvm_irqchip_in_kernel()) { + apic_set_max_apic_id(x86ms->apic_id_limit); + } + possible_cpus = mc->possible_cpu_arch_ids(ms); for (i = 0; i < ms->smp.cpus; i++) { x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); diff --git a/hw/intc/apic.c b/hw/intc/apic.c index 7a349c0723..178fb26b47 100644 --- a/hw/intc/apic.c +++ b/hw/intc/apic.c @@ -32,14 +32,13 @@ #include "qapi/error.h" #include "qom/object.h" -#define MAX_APICS 255 -#define MAX_APIC_WORDS 8 - #define SYNC_FROM_VAPIC 0x1 #define SYNC_TO_VAPIC 0x2 #define SYNC_ISR_IRR_TO_VAPIC 0x4 -static APICCommonState *local_apics[MAX_APICS + 1]; +static APICCommonState **local_apics; +static uint32_t max_apics; +static uint32_t max_apic_words; #define TYPE_APIC "apic" /*This is reusing the APICCommonState typedef from APIC_COMMON */ @@ -49,7 +48,19 @@ DECLARE_INSTANCE_CHECKER(APICCommonState, APIC, static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode); static void apic_update_irq(APICCommonState *s); static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask, - uint8_t dest, uint8_t dest_mode); + uint32_t dest, uint8_t dest_mode); + +void apic_set_max_apic_id(uint32_t max_apic_id) +{ + int word_size = 32; + + /* round up the max apic id to next multiple of words */ + max_apics = (max_apic_id + word_size - 1) & ~(word_size - 1); + + local_apics = g_malloc0(sizeof(*local_apics) * max_apics); + max_apic_words = max_apics >> 5; +} + /* Find first bit starting from msb */ static int apic_fls_bit(uint32_t value) @@ -199,10 +210,10 @@ static void apic_external_nmi(APICCommonState *s) #define foreach_apic(apic, deliver_bitmask, code) \ {\ int __i, __j;\ - for(__i = 0; __i < MAX_APIC_WORDS; __i++) {\ + for (__i = 0; __i < max_apic_words; __i++) {\ uint32_t __mask = deliver_bitmask[__i];\ if (__mask) {\ - for(__j = 0; __j < 32; __j++) {\ + for (__j = 0; __j < 32; __j++) {\ if (__mask & (1U << __j)) {\ apic = local_apics[__i * 32 + __j];\ if (apic) {\ @@ -226,7 +237,7 @@ static void apic_bus_deliver(const uint32_t *deliver_bitmask, { int i, d; d = -1; - for(i = 0; i < MAX_APIC_WORDS; i++) { + for (i = 0; i < max_apic_words; i++) { if (deliver_bitmask[i]) { d = i * 32 + apic_ffs_bit(deliver_bitmask[i]); break; @@ -276,16 +287,18 @@ static void apic_bus_deliver(const uint32_t *deliver_bitmask, apic_set_irq(apic_iter, vector_num, trigger_mode) ); } -void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, - uint8_t vector_num, uint8_t trigger_mode) +static void apic_deliver_irq(uint32_t dest, uint8_t dest_mode, + uint8_t delivery_mode, uint8_t vector_num, + uint8_t trigger_mode) { - uint32_t deliver_bitmask[MAX_APIC_WORDS]; + uint32_t *deliver_bitmask = g_malloc(max_apic_words * sizeof(uint32_t)); trace_apic_deliver_irq(dest, dest_mode, delivery_mode, vector_num, trigger_mode); apic_get_delivery_bitmask(deliver_bitmask, dest, dest_mode); apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode); + g_free(deliver_bitmask); } bool is_x2apic_mode(DeviceState *dev) @@ -442,57 +455,123 @@ static void apic_eoi(APICCommonState *s) apic_update_irq(s); } -static int apic_find_dest(uint8_t dest) +static bool apic_match_dest(APICCommonState *apic, uint32_t dest) { - APICCommonState *apic = local_apics[dest]; + if (is_x2apic_mode(&apic->parent_obj)) { + return apic->initial_apic_id == dest; + } else { + return apic->id == (uint8_t)dest; + } +} + +static void apic_find_dest(uint32_t *deliver_bitmask, uint32_t dest) +{ + APICCommonState *apic = NULL; int i; - if (apic && apic->id == dest) - return dest; /* shortcut in case apic->id == local_apics[dest]->id */ - - for (i = 0; i < MAX_APICS; i++) { + for (i = 0; i < max_apics; i++) { apic = local_apics[i]; - if (apic && apic->id == dest) - return i; - if (!apic) - break; + if (apic && apic_match_dest(apic, dest)) { + apic_set_bit(deliver_bitmask, i); + } } +} - return -1; +/* + * Deliver interrupt to x2APIC CPUs if it is x2APIC broadcast. + * Otherwise, deliver interrupt to xAPIC CPUs if it is xAPIC + * broadcast. + */ +static void apic_get_broadcast_bitmask(uint32_t *deliver_bitmask, + bool is_x2apic_broadcast) +{ + int i; + APICCommonState *apic_iter; + + for (i = 0; i < max_apics; i++) { + apic_iter = local_apics[i]; + if (apic_iter) { + bool apic_in_x2apic = is_x2apic_mode(&apic_iter->parent_obj); + + if (is_x2apic_broadcast && apic_in_x2apic) { + apic_set_bit(deliver_bitmask, i); + } else if (!is_x2apic_broadcast && !apic_in_x2apic) { + apic_set_bit(deliver_bitmask, i); + } + } + } } static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask, - uint8_t dest, uint8_t dest_mode) + uint32_t dest, uint8_t dest_mode) { - APICCommonState *apic_iter; + APICCommonState *apic; int i; - if (dest_mode == 0) { - if (dest == 0xff) { - memset(deliver_bitmask, 0xff, MAX_APIC_WORDS * sizeof(uint32_t)); + memset(deliver_bitmask, 0x00, max_apic_words * sizeof(uint32_t)); + + /* + * x2APIC broadcast is delivered to all x2APIC CPUs regardless of + * destination mode. In case the destination mode is physical, it is + * broadcasted to all xAPIC CPUs too. Otherwise, if the destination + * mode is logical, we need to continue checking if xAPIC CPUs accepts + * the interrupt. + */ + if (dest == 0xffffffff) { + if (dest_mode == APIC_DESTMODE_PHYSICAL) { + memset(deliver_bitmask, 0xff, max_apic_words * sizeof(uint32_t)); + return; } else { - int idx = apic_find_dest(dest); - memset(deliver_bitmask, 0x00, MAX_APIC_WORDS * sizeof(uint32_t)); - if (idx >= 0) - apic_set_bit(deliver_bitmask, idx); + apic_get_broadcast_bitmask(deliver_bitmask, true); + } + } + + if (dest_mode == APIC_DESTMODE_PHYSICAL) { + apic_find_dest(deliver_bitmask, dest); + /* Any APIC in xAPIC mode will interpret 0xFF as broadcast */ + if (dest == 0xff) { + apic_get_broadcast_bitmask(deliver_bitmask, false); } } else { - /* XXX: cluster mode */ - memset(deliver_bitmask, 0x00, MAX_APIC_WORDS * sizeof(uint32_t)); - for(i = 0; i < MAX_APICS; i++) { - apic_iter = local_apics[i]; - if (apic_iter) { - if (apic_iter->dest_mode == 0xf) { - if (dest & apic_iter->log_dest) - apic_set_bit(deliver_bitmask, i); - } else if (apic_iter->dest_mode == 0x0) { - if ((dest & 0xf0) == (apic_iter->log_dest & 0xf0) && - (dest & apic_iter->log_dest & 0x0f)) { + /* XXX: logical mode */ + for (i = 0; i < max_apics; i++) { + apic = local_apics[i]; + if (apic) { + /* x2APIC logical mode */ + if (apic->apicbase & MSR_IA32_APICBASE_EXTD) { + if ((dest >> 16) == (apic->extended_log_dest >> 16) && + (dest & apic->extended_log_dest & 0xffff)) { apic_set_bit(deliver_bitmask, i); } + continue; } - } else { - break; + + /* xAPIC logical mode */ + dest = (uint8_t)dest; + if (apic->dest_mode == APIC_DESTMODE_LOGICAL_FLAT) { + if (dest & apic->log_dest) { + apic_set_bit(deliver_bitmask, i); + } + } else if (apic->dest_mode == APIC_DESTMODE_LOGICAL_CLUSTER) { + /* + * In cluster model of xAPIC logical mode IPI, 4 higher + * bits are used as cluster address, 4 lower bits are + * the bitmask for local APICs in the cluster. The IPI + * is delivered to an APIC if the cluster address + * matches and the APIC's address bit in the cluster is + * set in bitmask of destination ID in IPI. + * + * The cluster address ranges from 0 - 14, the cluster + * address 15 (0xf) is the broadcast address to all + * clusters. + */ + if ((dest & 0xf0) == 0xf0 || + (dest & 0xf0) == (apic->log_dest & 0xf0)) { + if (dest & apic->log_dest & 0x0f) { + apic_set_bit(deliver_bitmask, i); + } + } + } } } } @@ -516,29 +595,36 @@ void apic_sipi(DeviceState *dev) s->wait_for_sipi = 0; } -static void apic_deliver(DeviceState *dev, uint8_t dest, uint8_t dest_mode, +static void apic_deliver(DeviceState *dev, uint32_t dest, uint8_t dest_mode, uint8_t delivery_mode, uint8_t vector_num, - uint8_t trigger_mode) + uint8_t trigger_mode, uint8_t dest_shorthand) { APICCommonState *s = APIC(dev); - uint32_t deliver_bitmask[MAX_APIC_WORDS]; - int dest_shorthand = (s->icr[0] >> 18) & 3; APICCommonState *apic_iter; + uint32_t deliver_bitmask_size = max_apic_words * sizeof(uint32_t); + uint32_t *deliver_bitmask = g_malloc(deliver_bitmask_size); + uint32_t current_apic_id; + + if (is_x2apic_mode(dev)) { + current_apic_id = s->initial_apic_id; + } else { + current_apic_id = s->id; + } switch (dest_shorthand) { case 0: apic_get_delivery_bitmask(deliver_bitmask, dest, dest_mode); break; case 1: - memset(deliver_bitmask, 0x00, sizeof(deliver_bitmask)); - apic_set_bit(deliver_bitmask, s->id); + memset(deliver_bitmask, 0x00, deliver_bitmask_size); + apic_set_bit(deliver_bitmask, current_apic_id); break; case 2: - memset(deliver_bitmask, 0xff, sizeof(deliver_bitmask)); + memset(deliver_bitmask, 0xff, deliver_bitmask_size); break; case 3: - memset(deliver_bitmask, 0xff, sizeof(deliver_bitmask)); - apic_reset_bit(deliver_bitmask, s->id); + memset(deliver_bitmask, 0xff, deliver_bitmask_size); + apic_reset_bit(deliver_bitmask, current_apic_id); break; } @@ -562,6 +648,7 @@ static void apic_deliver(DeviceState *dev, uint8_t dest, uint8_t dest_mode, } apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode); + g_free(deliver_bitmask); } static bool apic_check_pic(APICCommonState *s) @@ -658,7 +745,11 @@ static int apic_register_read(int index, uint64_t *value) switch(index) { case 0x02: /* id */ - val = s->id << 24; + if (is_x2apic_mode(dev)) { + val = s->initial_apic_id; + } else { + val = s->id << 24; + } break; case 0x03: /* version */ val = s->version | ((APIC_LVT_NB - 1) << 16); @@ -681,10 +772,19 @@ static int apic_register_read(int index, uint64_t *value) val = 0; break; case 0x0d: - val = s->log_dest << 24; + if (is_x2apic_mode(dev)) { + val = s->extended_log_dest; + } else { + val = s->log_dest << 24; + } break; case 0x0e: - val = (s->dest_mode << 28) | 0xfffffff; + if (is_x2apic_mode(dev)) { + val = 0; + ret = -1; + } else { + val = (s->dest_mode << 28) | 0xfffffff; + } break; case 0x0f: val = s->spurious_vec; @@ -764,7 +864,12 @@ static void apic_send_msi(MSIMessage *msi) { uint64_t addr = msi->address; uint32_t data = msi->data; - uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; + uint32_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; + /* + * The higher 3 bytes of destination id is stored in higher word of + * msi address. See x86_iommu_irq_to_msi_message() + */ + dest = dest | (addr >> 32); uint8_t vector = (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1; uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1; @@ -788,6 +893,10 @@ static int apic_register_write(int index, uint64_t val) switch(index) { case 0x02: + if (is_x2apic_mode(dev)) { + return -1; + } + s->id = (val >> 24); break; case 0x03: @@ -807,9 +916,17 @@ static int apic_register_write(int index, uint64_t val) apic_eoi(s); break; case 0x0d: + if (is_x2apic_mode(dev)) { + return -1; + } + s->log_dest = val >> 24; break; case 0x0e: + if (is_x2apic_mode(dev)) { + return -1; + } + s->dest_mode = val >> 28; break; case 0x0f: @@ -821,13 +938,27 @@ static int apic_register_write(int index, uint64_t val) case 0x20 ... 0x27: case 0x28: break; - case 0x30: + case 0x30: { + uint32_t dest; + s->icr[0] = val; - apic_deliver(dev, (s->icr[1] >> 24) & 0xff, (s->icr[0] >> 11) & 1, + if (is_x2apic_mode(dev)) { + s->icr[1] = val >> 32; + dest = s->icr[1]; + } else { + dest = (s->icr[1] >> 24) & 0xff; + } + + apic_deliver(dev, dest, (s->icr[0] >> 11) & 1, (s->icr[0] >> 8) & 7, (s->icr[0] & 0xff), - (s->icr[0] >> 15) & 1); + (s->icr[0] >> 15) & 1, (s->icr[0] >> 18) & 3); break; + } case 0x31: + if (is_x2apic_mode(dev)) { + return -1; + } + s->icr[1] = val; break; case 0x32 ... 0x37: @@ -856,6 +987,23 @@ static int apic_register_write(int index, uint64_t val) s->count_shift = (v + 1) & 7; } break; + case 0x3f: { + int vector = val & 0xff; + + if (!is_x2apic_mode(dev)) { + return -1; + } + + /* + * Self IPI is identical to IPI with + * - Destination shorthand: 1 (Self) + * - Trigger mode: 0 (Edge) + * - Delivery mode: 0 (Fixed) + */ + apic_deliver(dev, 0, 0, APIC_DM_FIXED, vector, 0, 1); + + break; + } default: s->esr |= APIC_ESR_ILLEGAL_ADDRESS; return -1; @@ -933,12 +1081,6 @@ static void apic_realize(DeviceState *dev, Error **errp) { APICCommonState *s = APIC(dev); - if (s->id >= MAX_APICS) { - error_setg(errp, "%s initialization failed. APIC ID %d is invalid", - object_get_typename(OBJECT(dev)), s->id); - return; - } - if (kvm_enabled()) { warn_report("Userspace local APIC is deprecated for KVM."); warn_report("Do not use kernel-irqchip except for the -M isapc machine type."); @@ -955,7 +1097,16 @@ static void apic_realize(DeviceState *dev, Error **errp) s->io_memory.disable_reentrancy_guard = true; s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s); - local_apics[s->id] = s; + + /* + * The --machine none does not call apic_set_max_apic_id before creating + * apic, so we need to call it here and set it to 1 which is the max cpus + * in machine none. + */ + if (!local_apics) { + apic_set_max_apic_id(1); + } + local_apics[s->initial_apic_id] = s; msi_nonbroken = true; } @@ -965,7 +1116,7 @@ static void apic_unrealize(DeviceState *dev) APICCommonState *s = APIC(dev); timer_free(s->timer); - local_apics[s->id] = NULL; + local_apics[s->initial_apic_id] = NULL; } static void apic_class_init(ObjectClass *klass, void *data) diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index 6c100b48d6..3c43ac9a1d 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -287,6 +287,10 @@ static void apic_common_realize(DeviceState *dev, Error **errp) } vmstate_register_with_alias_id(NULL, instance_id, &vmstate_apic_common, s, -1, 0, NULL); + + /* APIC LDR in x2APIC mode */ + s->extended_log_dest = ((s->initial_apic_id >> 4) << 16) | + (1 << (s->initial_apic_id & 0xf)); } static void apic_common_unrealize(DeviceState *dev) @@ -427,6 +431,11 @@ static void apic_common_set_id(Object *obj, Visitor *v, const char *name, return; } + if (value >= 255 && !cpu_has_x2apic_feature(&s->cpu->env)) { + error_setg(errp, "APIC ID %d requires x2APIC feature in CPU", value); + return; + } + s->initial_apic_id = value; s->id = (uint8_t)value; } diff --git a/include/hw/i386/apic.h b/include/hw/i386/apic.h index ddea4213db..c8ca41ab44 100644 --- a/include/hw/i386/apic.h +++ b/include/hw/i386/apic.h @@ -3,8 +3,7 @@ /* apic.c */ -void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, - uint8_t vector_num, uint8_t trigger_mode); +void apic_set_max_apic_id(uint32_t max_apic_id); int apic_accept_pic_intr(DeviceState *s); void apic_deliver_pic_intr(DeviceState *s, int level); void apic_deliver_nmi(DeviceState *d); diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h index 5f2ba24bfc..e796e6cae3 100644 --- a/include/hw/i386/apic_internal.h +++ b/include/hw/i386/apic_internal.h @@ -46,8 +46,10 @@ #define APIC_DM_EXTINT 7 /* APIC destination mode */ -#define APIC_DESTMODE_FLAT 0xf -#define APIC_DESTMODE_CLUSTER 1 +#define APIC_DESTMODE_PHYSICAL 0 +#define APIC_DESTMODE_LOGICAL 1 +#define APIC_DESTMODE_LOGICAL_FLAT 0xf +#define APIC_DESTMODE_LOGICAL_CLUSTER 0 #define APIC_TRIGGER_EDGE 0 #define APIC_TRIGGER_LEVEL 1 @@ -187,6 +189,7 @@ struct APICCommonState { DeviceState *vapic; hwaddr vapic_paddr; /* note: persistence via kvmvapic */ bool legacy_instance_id; + uint32_t extended_log_dest; }; typedef struct VAPICState { diff --git a/target/i386/cpu-sysemu.c b/target/i386/cpu-sysemu.c index 2375e48178..7422096737 100644 --- a/target/i386/cpu-sysemu.c +++ b/target/i386/cpu-sysemu.c @@ -235,6 +235,16 @@ void cpu_clear_apic_feature(CPUX86State *env) env->features[FEAT_1_EDX] &= ~CPUID_APIC; } +void cpu_set_apic_feature(CPUX86State *env) +{ + env->features[FEAT_1_EDX] |= CPUID_APIC; +} + +bool cpu_has_x2apic_feature(CPUX86State *env) +{ + return env->features[FEAT_1_ECX] & CPUID_EXT_X2APIC; +} + bool cpu_is_bsp(X86CPU *cpu) { return cpu_get_apic_base(cpu->apic_state) & MSR_IA32_APICBASE_BSP; @@ -281,11 +291,17 @@ void x86_cpu_apic_create(X86CPU *cpu, Error **errp) OBJECT(cpu->apic_state)); object_unref(OBJECT(cpu->apic_state)); - qdev_prop_set_uint32(cpu->apic_state, "id", cpu->apic_id); /* TODO: convert to link<> */ apic = APIC_COMMON(cpu->apic_state); apic->cpu = cpu; apic->apicbase = APIC_DEFAULT_ADDRESS | MSR_IA32_APICBASE_ENABLE; + + /* + * apic_common_set_id needs to check if the CPU has x2APIC + * feature in case APIC ID >= 255, so we need to set apic->cpu + * before setting APIC ID + */ + qdev_prop_set_uint32(cpu->apic_state, "id", cpu->apic_id); } void x86_cpu_apic_realize(X86CPU *cpu, Error **errp) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index afabdeab75..08eaa61c56 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -2239,8 +2239,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); void cpu_clear_apic_feature(CPUX86State *env); +void cpu_set_apic_feature(CPUX86State *env); void host_cpuid(uint32_t function, uint32_t count, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); +bool cpu_has_x2apic_feature(CPUX86State *env); /* helper.c */ void x86_cpu_set_a20(X86CPU *cpu, int a20_state); From 774204cf9874e58dc7fc13394a505452357750ad Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Thu, 11 Jan 2024 22:44:00 +0700 Subject: [PATCH 14/60] apic, i386/tcg: add x2apic transitions This commit adds support for x2APIC transitions when writing to MSR_IA32_APICBASE register and finally adds CPUID_EXT_X2APIC to TCG_EXT_FEATURES. The set_base in APICCommonClass now returns an integer to indicate error in execution. apic_set_base return -1 on invalid APIC state transition, accelerator can use this to raise appropriate exception. Signed-off-by: Bui Quang Minh Message-Id: <20240111154404.5333-4-minhquangbui99@gmail.com> Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/kvm/apic.c | 3 +- hw/i386/xen/xen_apic.c | 3 +- hw/intc/apic.c | 62 +++++++++++++++++++++++++++- hw/intc/apic_common.c | 13 +++--- include/hw/i386/apic.h | 2 +- include/hw/i386/apic_internal.h | 2 +- target/i386/cpu.c | 9 ++-- target/i386/cpu.h | 4 ++ target/i386/tcg/sysemu/misc_helper.c | 14 ++++++- target/i386/whpx/whpx-apic.c | 3 +- 10 files changed, 96 insertions(+), 19 deletions(-) diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c index 1e89ca0899..a72c28e8a7 100644 --- a/hw/i386/kvm/apic.c +++ b/hw/i386/kvm/apic.c @@ -95,9 +95,10 @@ void kvm_get_apic_state(DeviceState *dev, struct kvm_lapic_state *kapic) apic_next_timer(s, s->initial_count_load_time); } -static void kvm_apic_set_base(APICCommonState *s, uint64_t val) +static int kvm_apic_set_base(APICCommonState *s, uint64_t val) { s->apicbase = val; + return 0; } static void kvm_apic_set_tpr(APICCommonState *s, uint8_t val) diff --git a/hw/i386/xen/xen_apic.c b/hw/i386/xen/xen_apic.c index 7c7a60b166..101e16a766 100644 --- a/hw/i386/xen/xen_apic.c +++ b/hw/i386/xen/xen_apic.c @@ -49,8 +49,9 @@ static void xen_apic_realize(DeviceState *dev, Error **errp) msi_nonbroken = true; } -static void xen_apic_set_base(APICCommonState *s, uint64_t val) +static int xen_apic_set_base(APICCommonState *s, uint64_t val) { + return 0; } static void xen_apic_set_tpr(APICCommonState *s, uint8_t val) diff --git a/hw/intc/apic.c b/hw/intc/apic.c index 178fb26b47..1d887d66b8 100644 --- a/hw/intc/apic.c +++ b/hw/intc/apic.c @@ -308,8 +308,49 @@ bool is_x2apic_mode(DeviceState *dev) return s->apicbase & MSR_IA32_APICBASE_EXTD; } -static void apic_set_base(APICCommonState *s, uint64_t val) +static int apic_set_base_check(APICCommonState *s, uint64_t val) { + /* Enable x2apic when x2apic is not supported by CPU */ + if (!cpu_has_x2apic_feature(&s->cpu->env) && + val & MSR_IA32_APICBASE_EXTD) { + return -1; + } + + /* + * Transition into invalid state + * (s->apicbase & MSR_IA32_APICBASE_ENABLE == 0) && + * (s->apicbase & MSR_IA32_APICBASE_EXTD) == 1 + */ + if (!(val & MSR_IA32_APICBASE_ENABLE) && + (val & MSR_IA32_APICBASE_EXTD)) { + return -1; + } + + /* Invalid transition from disabled mode to x2APIC */ + if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) && + !(s->apicbase & MSR_IA32_APICBASE_EXTD) && + (val & MSR_IA32_APICBASE_ENABLE) && + (val & MSR_IA32_APICBASE_EXTD)) { + return -1; + } + + /* Invalid transition from x2APIC to xAPIC */ + if ((s->apicbase & MSR_IA32_APICBASE_ENABLE) && + (s->apicbase & MSR_IA32_APICBASE_EXTD) && + (val & MSR_IA32_APICBASE_ENABLE) && + !(val & MSR_IA32_APICBASE_EXTD)) { + return -1; + } + + return 0; +} + +static int apic_set_base(APICCommonState *s, uint64_t val) +{ + if (apic_set_base_check(s, val) < 0) { + return -1; + } + s->apicbase = (val & 0xfffff000) | (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE)); /* if disabled, cannot be enabled again */ @@ -318,6 +359,25 @@ static void apic_set_base(APICCommonState *s, uint64_t val) cpu_clear_apic_feature(&s->cpu->env); s->spurious_vec &= ~APIC_SV_ENABLE; } + + /* Transition from disabled mode to xAPIC */ + if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) && + (val & MSR_IA32_APICBASE_ENABLE)) { + s->apicbase |= MSR_IA32_APICBASE_ENABLE; + cpu_set_apic_feature(&s->cpu->env); + } + + /* Transition from xAPIC to x2APIC */ + if (cpu_has_x2apic_feature(&s->cpu->env) && + !(s->apicbase & MSR_IA32_APICBASE_EXTD) && + (val & MSR_IA32_APICBASE_EXTD)) { + s->apicbase |= MSR_IA32_APICBASE_EXTD; + + s->log_dest = ((s->initial_apic_id & 0xffff0) << 16) | + (1 << (s->initial_apic_id & 0xf)); + } + + return 0; } static void apic_set_tpr(APICCommonState *s, uint8_t val) diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index 3c43ac9a1d..16ab40a35f 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -35,20 +35,19 @@ bool apic_report_tpr_access; -void cpu_set_apic_base(DeviceState *dev, uint64_t val) +int cpu_set_apic_base(DeviceState *dev, uint64_t val) { trace_cpu_set_apic_base(val); if (dev) { APICCommonState *s = APIC_COMMON(dev); APICCommonClass *info = APIC_COMMON_GET_CLASS(s); - /* switching to x2APIC, reset possibly modified xAPIC ID */ - if (!(s->apicbase & MSR_IA32_APICBASE_EXTD) && - (val & MSR_IA32_APICBASE_EXTD)) { - s->id = s->initial_apic_id; - } - info->set_base(s, val); + /* Reset possibly modified xAPIC ID */ + s->id = s->initial_apic_id; + return info->set_base(s, val); } + + return 0; } uint64_t cpu_get_apic_base(DeviceState *dev) diff --git a/include/hw/i386/apic.h b/include/hw/i386/apic.h index c8ca41ab44..f6e7489f2d 100644 --- a/include/hw/i386/apic.h +++ b/include/hw/i386/apic.h @@ -8,7 +8,7 @@ int apic_accept_pic_intr(DeviceState *s); void apic_deliver_pic_intr(DeviceState *s, int level); void apic_deliver_nmi(DeviceState *d); int apic_get_interrupt(DeviceState *s); -void cpu_set_apic_base(DeviceState *s, uint64_t val); +int cpu_set_apic_base(DeviceState *s, uint64_t val); uint64_t cpu_get_apic_base(DeviceState *s); void cpu_set_apic_tpr(DeviceState *s, uint8_t val); uint8_t cpu_get_apic_tpr(DeviceState *s); diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h index e796e6cae3..d6e85833da 100644 --- a/include/hw/i386/apic_internal.h +++ b/include/hw/i386/apic_internal.h @@ -137,7 +137,7 @@ struct APICCommonClass { DeviceRealize realize; DeviceUnrealize unrealize; - void (*set_base)(APICCommonState *s, uint64_t val); + int (*set_base)(APICCommonState *s, uint64_t val); void (*set_tpr)(APICCommonState *s, uint8_t val); uint8_t (*get_tpr)(APICCommonState *s); void (*enable_tpr_reporting)(APICCommonState *s, bool enable); diff --git a/target/i386/cpu.c b/target/i386/cpu.c index ef46755a50..2126b0e589 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -631,8 +631,8 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, * in CPL=3; remove them if they are ever implemented for system emulation. */ #if defined CONFIG_USER_ONLY -#define CPUID_EXT_KERNEL_FEATURES (CPUID_EXT_PCID | CPUID_EXT_TSC_DEADLINE_TIMER | \ - CPUID_EXT_X2APIC) +#define CPUID_EXT_KERNEL_FEATURES \ + (CPUID_EXT_PCID | CPUID_EXT_TSC_DEADLINE_TIMER) #else #define CPUID_EXT_KERNEL_FEATURES 0 #endif @@ -642,12 +642,13 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, CPUID_EXT_XSAVE | /* CPUID_EXT_OSXSAVE is dynamic */ \ CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR | \ CPUID_EXT_RDRAND | CPUID_EXT_AVX | CPUID_EXT_F16C | \ - CPUID_EXT_FMA | CPUID_EXT_KERNEL_FEATURES) + CPUID_EXT_FMA | CPUID_EXT_X2APIC | CPUID_EXT_KERNEL_FEATURES) /* missing: CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX, CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA, - CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER */ + CPUID_EXT_TSC_DEADLINE_TIMER + */ #ifdef TARGET_X86_64 #define TCG_EXT2_X86_64_FEATURES CPUID_EXT2_LM diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 08eaa61c56..dfe43b8204 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -379,6 +379,10 @@ typedef enum X86Seg { #define MSR_IA32_APICBASE_ENABLE (1<<11) #define MSR_IA32_APICBASE_EXTD (1 << 10) #define MSR_IA32_APICBASE_BASE (0xfffffU<<12) +#define MSR_IA32_APICBASE_RESERVED \ + (~(uint64_t)(MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE \ + | MSR_IA32_APICBASE_EXTD | MSR_IA32_APICBASE_BASE)) + #define MSR_IA32_FEATURE_CONTROL 0x0000003a #define MSR_TSC_ADJUST 0x0000003b #define MSR_IA32_SPEC_CTRL 0x48 diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c index 1c43a9f4f7..7de0a6e866 100644 --- a/target/i386/tcg/sysemu/misc_helper.c +++ b/target/i386/tcg/sysemu/misc_helper.c @@ -158,9 +158,19 @@ void helper_wrmsr(CPUX86State *env) case MSR_IA32_SYSENTER_EIP: env->sysenter_eip = val; break; - case MSR_IA32_APICBASE: - cpu_set_apic_base(env_archcpu(env)->apic_state, val); + case MSR_IA32_APICBASE: { + int ret; + + if (val & MSR_IA32_APICBASE_RESERVED) { + goto error; + } + + ret = cpu_set_apic_base(env_archcpu(env)->apic_state, val); + if (ret < 0) { + goto error; + } break; + } case MSR_EFER: { uint64_t update_mask; diff --git a/target/i386/whpx/whpx-apic.c b/target/i386/whpx/whpx-apic.c index 8710e37567..7e14ded978 100644 --- a/target/i386/whpx/whpx-apic.c +++ b/target/i386/whpx/whpx-apic.c @@ -90,9 +90,10 @@ static void whpx_get_apic_state(APICCommonState *s, apic_next_timer(s, s->initial_count_load_time); } -static void whpx_apic_set_base(APICCommonState *s, uint64_t val) +static int whpx_apic_set_base(APICCommonState *s, uint64_t val) { s->apicbase = val; + return 0; } static void whpx_put_apic_base(CPUState *cpu, uint64_t val) From 2cf16205290bdb6d92bade3590adec6e08fd26c9 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Thu, 11 Jan 2024 22:44:01 +0700 Subject: [PATCH 15/60] intel_iommu: allow Extended Interrupt Mode when using userspace APIC As userspace APIC now supports x2APIC, intel interrupt remapping hardware can be set to EIM mode when userspace local APIC is used. Suggested-by: Joao Martins Acked-by: Peter Xu Signed-off-by: Bui Quang Minh Message-Id: <20240111154404.5333-5-minhquangbui99@gmail.com> Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 1a07faddb4..cf933189d3 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -4124,11 +4124,7 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; } if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) { - if (!kvm_irqchip_is_split()) { - error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split"); - return false; - } - if (kvm_enabled() && !kvm_enable_x2apic()) { + if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) { error_setg(errp, "eim=on requires support on the KVM side" "(X2APIC_API, first shipped in v4.7)"); return false; From 595cd6fd9dffe51ef3fdb3077979a87ff2947b1f Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Thu, 11 Jan 2024 22:44:02 +0700 Subject: [PATCH 16/60] test: bios-tables-test: prepare IVRS change in ACPI table Following the instructions in bios-tables-test, this lists that IVRS.ivrs in ACPI table will be changed to add new IVHD type 0x11. Signed-off-by: Bui Quang Minh Message-Id: <20240111154404.5333-6-minhquangbui99@gmail.com> Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/bios-tables-test-allowed-diff.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index dfb8523c8b..ac420db6b7 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1 +1,2 @@ /* List of comma-separated changed AML files to ignore */ +"tests/data/acpi/q35/IVRS.ivrs", From 328a11a08a70ca9e565cee807eb74e1e59e1b5d9 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Thu, 11 Jan 2024 22:44:03 +0700 Subject: [PATCH 17/60] amd_iommu: report x2APIC support to the operating system This commit adds XTSup configuration to let user choose to whether enable this feature or not. When XTSup is enabled, additional bytes in IRTE with enabled guest virtual VAPIC are used to support 32-bit destination id. Additionally, this commit exports IVHD type 0x11 besides the old IVHD type 0x10 in ACPI table. IVHD type 0x10 does not report full set of IOMMU features only the legacy ones, so operating system (e.g. Linux) may only detects x2APIC support if IVHD type 0x11 is available. The IVHD type 0x10 is kept so that old operating system that only parses type 0x10 can detect the IOMMU device. Besides, an amd_iommu-stub.c file is created to provide the definition for amdvi_extended_feature_register when CONFIG_AMD_IOMMU=n. This function is used by acpi-build.c to get the extended feature register value for building the ACPI table. When CONFIG_AMD_IOMMU=y, this function is defined in amd_iommu.c. Signed-off-by: Bui Quang Minh Message-Id: <20240111154404.5333-7-minhquangbui99@gmail.com> Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 107 +++++++++++++++++++++++++-------------- hw/i386/amd_iommu-stub.c | 26 ++++++++++ hw/i386/amd_iommu.c | 29 +++++++++-- hw/i386/amd_iommu.h | 16 ++++-- hw/i386/meson.build | 3 +- 5 files changed, 134 insertions(+), 47 deletions(-) create mode 100644 hw/i386/amd_iommu-stub.c diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index e990b0ae92..d5c6c94474 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2333,30 +2333,23 @@ static void build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id, const char *oem_table_id) { - int ivhd_table_len = 24; AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default()); GArray *ivhd_blob = g_array_new(false, true, 1); AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id, .oem_table_id = oem_table_id }; + uint64_t feature_report; acpi_table_begin(&table, table_data); /* IVinfo - IO virtualization information common to all * IOMMU units in a system */ - build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4); + build_append_int_noprefix(table_data, + (1UL << 0) | /* EFRSup */ + (40UL << 8), /* PASize */ + 4); /* reserved */ build_append_int_noprefix(table_data, 0, 8); - /* IVHD definition - type 10h */ - build_append_int_noprefix(table_data, 0x10, 1); - /* virtualization flags */ - build_append_int_noprefix(table_data, - (1UL << 0) | /* HtTunEn */ - (1UL << 4) | /* iotblSup */ - (1UL << 6) | /* PrefSup */ - (1UL << 7), /* PPRSup */ - 1); - /* * A PCI bus walk, for each PCI host bridge, is necessary to create a * complete set of IVHD entries. Do this into a separate blob so that we @@ -2376,18 +2369,34 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id, build_append_int_noprefix(ivhd_blob, 0x0000001, 4); } - ivhd_table_len += ivhd_blob->len; - /* * When interrupt remapping is supported, we add a special IVHD device - * for type IO-APIC. + * for type IO-APIC + * Refer to spec - Table 95: IVHD device entry type codes + * + * Linux IOMMU driver checks for the special IVHD device (type IO-APIC). + * See Linux kernel commit 'c2ff5cf5294bcbd7fa50f7d860e90a66db7e5059' */ if (x86_iommu_ir_supported(x86_iommu_get_default())) { - ivhd_table_len += 8; + build_append_int_noprefix(ivhd_blob, + (0x1ull << 56) | /* type IOAPIC */ + (IOAPIC_SB_DEVID << 40) | /* IOAPIC devid */ + 0x48, /* special device */ + 8); } + /* IVHD definition - type 10h */ + build_append_int_noprefix(table_data, 0x10, 1); + /* virtualization flags */ + build_append_int_noprefix(table_data, + (1UL << 0) | /* HtTunEn */ + (1UL << 4) | /* iotblSup */ + (1UL << 6) | /* PrefSup */ + (1UL << 7), /* PPRSup */ + 1); + /* IVHD length */ - build_append_int_noprefix(table_data, ivhd_table_len, 2); + build_append_int_noprefix(table_data, ivhd_blob->len + 24, 2); /* DeviceID */ build_append_int_noprefix(table_data, object_property_get_int(OBJECT(&s->pci), "addr", @@ -2401,31 +2410,53 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id, /* IOMMU info */ build_append_int_noprefix(table_data, 0, 2); /* IOMMU Feature Reporting */ - build_append_int_noprefix(table_data, - (48UL << 30) | /* HATS */ - (48UL << 28) | /* GATS */ - (1UL << 2) | /* GTSup */ - (1UL << 6), /* GASup */ - 4); + feature_report = (48UL << 30) | /* HATS */ + (48UL << 28) | /* GATS */ + (1UL << 2) | /* GTSup */ + (1UL << 6); /* GASup */ + if (s->xtsup) { + feature_report |= (1UL << 0); /* XTSup */ + } + build_append_int_noprefix(table_data, feature_report, 4); /* IVHD entries as found above */ g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len); - g_array_free(ivhd_blob, TRUE); - /* - * Add a special IVHD device type. - * Refer to spec - Table 95: IVHD device entry type codes - * - * Linux IOMMU driver checks for the special IVHD device (type IO-APIC). - * See Linux kernel commit 'c2ff5cf5294bcbd7fa50f7d860e90a66db7e5059' - */ - if (x86_iommu_ir_supported(x86_iommu_get_default())) { - build_append_int_noprefix(table_data, - (0x1ull << 56) | /* type IOAPIC */ - (IOAPIC_SB_DEVID << 40) | /* IOAPIC devid */ - 0x48, /* special device */ - 8); - } + /* IVHD definition - type 11h */ + build_append_int_noprefix(table_data, 0x11, 1); + /* virtualization flags */ + build_append_int_noprefix(table_data, + (1UL << 0) | /* HtTunEn */ + (1UL << 4), /* iotblSup */ + 1); + + /* IVHD length */ + build_append_int_noprefix(table_data, ivhd_blob->len + 40, 2); + /* DeviceID */ + build_append_int_noprefix(table_data, + object_property_get_int(OBJECT(&s->pci), "addr", + &error_abort), 2); + /* Capability offset */ + build_append_int_noprefix(table_data, s->pci.capab_offset, 2); + /* IOMMU base address */ + build_append_int_noprefix(table_data, s->mmio.addr, 8); + /* PCI Segment Group */ + build_append_int_noprefix(table_data, 0, 2); + /* IOMMU info */ + build_append_int_noprefix(table_data, 0, 2); + /* IOMMU Attributes */ + build_append_int_noprefix(table_data, 0, 4); + /* EFR Register Image */ + build_append_int_noprefix(table_data, + amdvi_extended_feature_register(s), + 8); + /* EFR Register Image 2 */ + build_append_int_noprefix(table_data, 0, 8); + + /* IVHD entries as found above */ + g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len); + + g_array_free(ivhd_blob, TRUE); acpi_table_end(linker, &table); } diff --git a/hw/i386/amd_iommu-stub.c b/hw/i386/amd_iommu-stub.c new file mode 100644 index 0000000000..d62a3732e6 --- /dev/null +++ b/hw/i386/amd_iommu-stub.c @@ -0,0 +1,26 @@ +/* + * Stubs for AMD IOMMU emulation + * + * Copyright (C) 2023 Bui Quang Minh + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#include "qemu/osdep.h" +#include "amd_iommu.h" + +uint64_t amdvi_extended_feature_register(AMDVIState *s) +{ + return AMDVI_DEFAULT_EXT_FEATURES; +} diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 4203144da9..7329553ad3 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -31,6 +31,7 @@ #include "hw/i386/apic_internal.h" #include "trace.h" #include "hw/i386/apic-msidef.h" +#include "hw/qdev-properties.h" /* used AMD-Vi MMIO registers */ const char *amdvi_mmio_low[] = { @@ -74,6 +75,16 @@ typedef struct AMDVIIOTLBEntry { uint64_t page_mask; /* physical page size */ } AMDVIIOTLBEntry; +uint64_t amdvi_extended_feature_register(AMDVIState *s) +{ + uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES; + if (s->xtsup) { + feature |= AMDVI_FEATURE_XT; + } + + return feature; +} + /* configure MMIO registers at startup/reset */ static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val, uint64_t romask, uint64_t w1cmask) @@ -1155,7 +1166,12 @@ static int amdvi_int_remap_ga(AMDVIState *iommu, irq->vector = irte.hi.fields.vector; irq->dest_mode = irte.lo.fields_remap.dm; irq->redir_hint = irte.lo.fields_remap.rq_eoi; - irq->dest = irte.lo.fields_remap.destination; + if (iommu->xtsup) { + irq->dest = irte.lo.fields_remap.destination | + (irte.hi.fields.destination_hi << 24); + } else { + irq->dest = irte.lo.fields_remap.destination & 0xff; + } return 0; } @@ -1505,8 +1521,9 @@ static void amdvi_init(AMDVIState *s) /* reset MMIO */ memset(s->mmior, 0, AMDVI_MMIO_SIZE); - amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, AMDVI_EXT_FEATURES, - 0xffffffffffffffef, 0); + amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, + amdvi_extended_feature_register(s), + 0xffffffffffffffef, 0); amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67); } @@ -1589,6 +1606,11 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) amdvi_init(s); } +static Property amdvi_properties[] = { + DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false), + DEFINE_PROP_END_OF_LIST(), +}; + static const VMStateDescription vmstate_amdvi_sysbus = { .name = "amd-iommu", .unmigratable = 1 @@ -1615,6 +1637,7 @@ static void amdvi_sysbus_class_init(ObjectClass *klass, void *data) dc->user_creatable = true; set_bit(DEVICE_CATEGORY_MISC, dc->categories); dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device"; + device_class_set_props(dc, amdvi_properties); } static const TypeInfo amdvi_sysbus = { diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index c5065a3e27..73619fe9ea 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -154,6 +154,7 @@ #define AMDVI_FEATURE_PREFETCH (1ULL << 0) /* page prefetch */ #define AMDVI_FEATURE_PPR (1ULL << 1) /* PPR Support */ +#define AMDVI_FEATURE_XT (1ULL << 2) /* x2APIC Support */ #define AMDVI_FEATURE_GT (1ULL << 4) /* Guest Translation */ #define AMDVI_FEATURE_IA (1ULL << 6) /* inval all support */ #define AMDVI_FEATURE_GA (1ULL << 7) /* guest VAPIC support */ @@ -173,8 +174,9 @@ #define AMDVI_IOTLB_MAX_SIZE 1024 #define AMDVI_DEVID_SHIFT 36 -/* extended feature support */ -#define AMDVI_EXT_FEATURES (AMDVI_FEATURE_PREFETCH | AMDVI_FEATURE_PPR | \ +/* default extended feature */ +#define AMDVI_DEFAULT_EXT_FEATURES \ + (AMDVI_FEATURE_PREFETCH | AMDVI_FEATURE_PPR | \ AMDVI_FEATURE_IA | AMDVI_FEATURE_GT | AMDVI_FEATURE_HE | \ AMDVI_GATS_MODE | AMDVI_HATS_MODE | AMDVI_FEATURE_GA) @@ -276,8 +278,8 @@ union irte_ga_lo { dm:1, /* ------ */ guest_mode:1, - destination:8, - rsvd_1:48; + destination:24, + rsvd_1:32; } fields_remap; }; @@ -285,7 +287,8 @@ union irte_ga_hi { uint64_t val; struct { uint64_t vector:8, - rsvd_2:56; + rsvd_2:48, + destination_hi:8; } fields; }; @@ -364,6 +367,9 @@ struct AMDVIState { /* Interrupt remapping */ bool ga_enabled; + bool xtsup; }; +uint64_t amdvi_extended_feature_register(AMDVIState *s); + #endif diff --git a/hw/i386/meson.build b/hw/i386/meson.build index 369c6bf823..b9c1ca39cb 100644 --- a/hw/i386/meson.build +++ b/hw/i386/meson.build @@ -9,7 +9,8 @@ i386_ss.add(files( i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'), if_false: files('x86-iommu-stub.c')) -i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c')) +i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'), + if_false: files('amd_iommu-stub.c')) i386_ss.add(when: 'CONFIG_I440FX', if_true: files('pc_piix.c')) i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('microvm.c', 'acpi-microvm.c', 'microvm-dt.c')) i386_ss.add(when: 'CONFIG_Q35', if_true: files('pc_q35.c')) From 7618fffdf16e03377390e51c033f0b14d772333a Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Thu, 11 Jan 2024 22:44:04 +0700 Subject: [PATCH 18/60] test: bios-tables-test: add IVRS changed binary Following the instructions in bios-tables-test, this adds the changed IVRS.ivrs binary. New IVRS differs in length, checksum, it enables EFRSup in Virtualization Info and adds IVHD type 0x11 with the same device entries as in IVHD type 0x10. ASL diff: /* * Intel ACPI Component Architecture * AML/ASL+ Disassembler version 20230628 (64-bit version) * Copyright (c) 2000 - 2023 Intel Corporation * - * Disassembly of tests/data/acpi/q35/IVRS.ivrs, Wed Nov 8 21:39:58 2023 + * Disassembly of /tmp/aml-2ODND2, Wed Nov 8 21:39:58 2023 * * ACPI Data Table [IVRS] * * Format: [HexOffset DecimalOffset ByteLength] FieldName : FieldValue (in hex) */ [000h 0000 004h] Signature : "IVRS" [I/O Virtualization Reporting Structure] -[004h 0004 004h] Table Length : 00000068 +[004h 0004 004h] Table Length : 000000B0 [008h 0008 001h] Revision : 01 -[009h 0009 001h] Checksum : 43 +[009h 0009 001h] Checksum : 74 [00Ah 0010 006h] Oem ID : "BOCHS " [010h 0016 008h] Oem Table ID : "BXPC " [018h 0024 004h] Oem Revision : 00000001 [01Ch 0028 004h] Asl Compiler ID : "BXPC" [020h 0032 004h] Asl Compiler Revision : 00000001 -[024h 0036 004h] Virtualization Info : 00002800 +[024h 0036 004h] Virtualization Info : 00002801 [028h 0040 008h] Reserved : 0000000000000000 [030h 0048 001h] Subtable Type : 10 [Hardware Definition Block (IVHD)] [031h 0049 001h] Flags (decoded below) : D1 HtTunEn : 1 PassPW : 0 ResPassPW : 0 Isoc Control : 0 Iotlb Support : 1 Coherent : 0 Prefetch Support : 1 PPR Support : 1 [032h 0050 002h] Length : 0038 [034h 0052 002h] DeviceId : 0010 [036h 0054 002h] Capability Offset : 0040 [038h 0056 008h] Base Address : 00000000FED80000 @@ -108,25 +108,129 @@ LINT1 Pass : 0 [060h 0096 001h] Subtable Type : 48 [Device Entry: Special Device] [061h 0097 002h] Device ID : 0000 [063h 0099 001h] Data Setting (decoded below) : 00 INITPass : 0 EIntPass : 0 NMIPass : 0 Reserved : 0 System MGMT : 0 LINT0 Pass : 0 LINT1 Pass : 0 [064h 0100 001h] Handle : 00 [065h 0101 002h] Source Used Device ID : 00A0 [067h 0103 001h] Variety : 01 -Raw Table Data: Length 104 (0x68) +[068h 0104 001h] Subtable Type : 11 [Hardware Definition Block (IVHD)] +[069h 0105 001h] Flags (decoded below) : 11 + HtTunEn : 1 + PassPW : 0 + ResPassPW : 0 + Isoc Control : 0 + Iotlb Support : 1 + Coherent : 0 + Prefetch Support : 0 + PPR Support : 0 +[06Ah 0106 002h] Length : 0048 +[06Ch 0108 002h] DeviceId : 0010 +[06Eh 0110 002h] Capability Offset : 0040 +[070h 0112 008h] Base Address : 00000000FED80000 +[078h 0120 002h] PCI Segment Group : 0000 +[07Ah 0122 002h] Virtualization Info : 0000 +[07Ch 0124 004h] Attributes : 00000000 +[080h 0128 008h] EFR Image : 00000000000029D3 +[088h 0136 008h] Reserved : 0000000000000000 + +[090h 0144 001h] Subtable Type : 02 [Device Entry: Select One Device] +[091h 0145 002h] Device ID : 0000 +[093h 0147 001h] Data Setting (decoded below) : 00 + INITPass : 0 + EIntPass : 0 + NMIPass : 0 + Reserved : 0 + System MGMT : 0 + LINT0 Pass : 0 + LINT1 Pass : 0 + +[094h 0148 001h] Subtable Type : 02 [Device Entry: Select One Device] +[095h 0149 002h] Device ID : 0008 +[097h 0151 001h] Data Setting (decoded below) : 00 + INITPass : 0 + EIntPass : 0 + NMIPass : 0 + Reserved : 0 + System MGMT : 0 + LINT0 Pass : 0 + LINT1 Pass : 0 + +[098h 0152 001h] Subtable Type : 02 [Device Entry: Select One Device] +[099h 0153 002h] Device ID : 0010 +[09Bh 0155 001h] Data Setting (decoded below) : 00 + INITPass : 0 + EIntPass : 0 + NMIPass : 0 + Reserved : 0 + System MGMT : 0 + LINT0 Pass : 0 + LINT1 Pass : 0 + +[09Ch 0156 001h] Subtable Type : 02 [Device Entry: Select One Device] +[09Dh 0157 002h] Device ID : 00F8 +[09Fh 0159 001h] Data Setting (decoded below) : 00 + INITPass : 0 + EIntPass : 0 + NMIPass : 0 + Reserved : 0 + System MGMT : 0 + LINT0 Pass : 0 + LINT1 Pass : 0 + +[0A0h 0160 001h] Subtable Type : 02 [Device Entry: Select One Device] +[0A1h 0161 002h] Device ID : 00FA +[0A3h 0163 001h] Data Setting (decoded below) : 00 + INITPass : 0 + EIntPass : 0 + NMIPass : 0 + Reserved : 0 + System MGMT : 0 + LINT0 Pass : 0 + LINT1 Pass : 0 + +[0A4h 0164 001h] Subtable Type : 02 [Device Entry: Select One Device] +[0A5h 0165 002h] Device ID : 00FB +[0A7h 0167 001h] Data Setting (decoded below) : 00 + INITPass : 0 + EIntPass : 0 + NMIPass : 0 + Reserved : 0 + System MGMT : 0 + LINT0 Pass : 0 + LINT1 Pass : 0 + +[0A8h 0168 001h] Subtable Type : 48 [Device Entry: Special Device] +[0A9h 0169 002h] Device ID : 0000 +[0ABh 0171 001h] Data Setting (decoded below) : 00 + INITPass : 0 + EIntPass : 0 + NMIPass : 0 + Reserved : 0 + System MGMT : 0 + LINT0 Pass : 0 + LINT1 Pass : 0 +[0ACh 0172 001h] Handle : 00 +[0ADh 0173 002h] Source Used Device ID : 00A0 +[0AFh 0175 001h] Variety : 01 + +Raw Table Data: Length 176 (0xB0) - 0000: 49 56 52 53 68 00 00 00 01 43 42 4F 43 48 53 20 // IVRSh....CBOCHS + 0000: 49 56 52 53 B0 00 00 00 01 74 42 4F 43 48 53 20 // IVRS.....tBOCHS 0010: 42 58 50 43 20 20 20 20 01 00 00 00 42 58 50 43 // BXPC ....BXPC - 0020: 01 00 00 00 00 28 00 00 00 00 00 00 00 00 00 00 // .....(.......... + 0020: 01 00 00 00 01 28 00 00 00 00 00 00 00 00 00 00 // .....(.......... 0030: 10 D1 38 00 10 00 40 00 00 00 D8 FE 00 00 00 00 // ..8...@......... 0040: 00 00 00 00 44 00 00 00 02 00 00 00 02 08 00 00 // ....D........... 0050: 02 10 00 00 02 F8 00 00 02 FA 00 00 02 FB 00 00 // ................ - 0060: 48 00 00 00 00 A0 00 01 // H....... + 0060: 48 00 00 00 00 A0 00 01 11 11 48 00 10 00 40 00 // H.........H...@. + 0070: 00 00 D8 FE 00 00 00 00 00 00 00 00 00 00 00 00 // ................ + 0080: D3 29 00 00 00 00 00 00 00 00 00 00 00 00 00 00 // .).............. + 0090: 02 00 00 00 02 08 00 00 02 10 00 00 02 F8 00 00 // ................ + 00A0: 02 FA 00 00 02 FB 00 00 48 00 00 00 00 A0 00 01 // ........H....... Signed-off-by: Bui Quang Minh Message-Id: <20240111154404.5333-8-minhquangbui99@gmail.com> Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/data/acpi/q35/IVRS.ivrs | Bin 104 -> 176 bytes tests/qtest/bios-tables-test-allowed-diff.h | 1 - 2 files changed, 1 deletion(-) diff --git a/tests/data/acpi/q35/IVRS.ivrs b/tests/data/acpi/q35/IVRS.ivrs index 17611202e53a32f7da8e4925d6955b384670b8b1..7f9e91aabc0b7777a7efc9f219587a4f91f0edb1 100644 GIT binary patch delta 63 zcmd1Uz{uqp78JaJfq{XsWFnUoBjZF>XH`K#4+a4S2cY1Me?S5bE^ES>3=9)pl>vea B3FiO+ delta 22 dcmdnMn8D>478IPpz`(%hJdsO^kzt~$GXOlo1j7IT diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index ac420db6b7..dfb8523c8b 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1,2 +1 @@ /* List of comma-separated changed AML files to ignore */ -"tests/data/acpi/q35/IVRS.ivrs", From f22f3a92eb728497dcd0f43e31b9148992db99bd Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 6 Jan 2024 14:25:44 +0100 Subject: [PATCH 19/60] hw/i386/x86: Reverse if statement The if statement currently uses double negation when executing the else branch. So swap the branches and simplify the condition to make the code more comprehensible. Signed-off-by: Bernhard Beschow Message-Id: <20240106132546.21248-2-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/x86.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 3d1bdd334e..505f64f89c 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -520,10 +520,10 @@ static void x86_nmi(NMIState *n, int cpu_index, Error **errp) CPU_FOREACH(cs) { X86CPU *cpu = X86_CPU(cs); - if (!cpu->apic_state) { - cpu_interrupt(cs, CPU_INTERRUPT_NMI); - } else { + if (cpu->apic_state) { apic_deliver_nmi(cpu->apic_state); + } else { + cpu_interrupt(cs, CPU_INTERRUPT_NMI); } } } From c2e6d7d8e7fc270a90c61944ef36574b1549ddcf Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 6 Jan 2024 14:25:45 +0100 Subject: [PATCH 20/60] hw/i386/x86: Fix PIC interrupt handling if APIC is globally disabled QEMU populates the apic_state attribute of x86 CPUs if supported by real hardware or if SMP is active. When handling interrupts, it just checks whether apic_state is populated to route the interrupt to the PIC or to the APIC. However, chapter 10.4.3 of [1] requires that: When IA32_APIC_BASE[11] is 0, the processor is functionally equivalent to an IA-32 processor without an on-chip APIC. This means that when apic_state is populated, QEMU needs to check for the MSR_IA32_APICBASE_ENABLE flag in addition. Implement this which fixes some real-world BIOSes. [1] Intel 64 and IA-32 Architectures Software Developer's Manual, Vol. 3A: System Programming Guide, Part 1 Signed-off-by: Bernhard Beschow Message-Id: <20240106132546.21248-3-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/x86.c | 4 ++-- hw/intc/apic_common.c | 13 +++++++++++++ include/hw/i386/apic.h | 1 + 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 505f64f89c..684dce90e9 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -520,7 +520,7 @@ static void x86_nmi(NMIState *n, int cpu_index, Error **errp) CPU_FOREACH(cs) { X86CPU *cpu = X86_CPU(cs); - if (cpu->apic_state) { + if (cpu_is_apic_enabled(cpu->apic_state)) { apic_deliver_nmi(cpu->apic_state); } else { cpu_interrupt(cs, CPU_INTERRUPT_NMI); @@ -555,7 +555,7 @@ static void pic_irq_request(void *opaque, int irq, int level) X86CPU *cpu = X86_CPU(cs); trace_x86_pic_interrupt(irq, level); - if (cpu->apic_state && !kvm_irqchip_in_kernel() && + if (cpu_is_apic_enabled(cpu->apic_state) && !kvm_irqchip_in_kernel() && !whpx_apic_in_platform()) { CPU_FOREACH(cs) { cpu = X86_CPU(cs); diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index 16ab40a35f..d8fc1e2815 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -62,6 +62,19 @@ uint64_t cpu_get_apic_base(DeviceState *dev) } } +bool cpu_is_apic_enabled(DeviceState *dev) +{ + APICCommonState *s; + + if (!dev) { + return false; + } + + s = APIC_COMMON(dev); + + return s->apicbase & MSR_IA32_APICBASE_ENABLE; +} + void cpu_set_apic_tpr(DeviceState *dev, uint8_t val) { APICCommonState *s; diff --git a/include/hw/i386/apic.h b/include/hw/i386/apic.h index f6e7489f2d..eb606d6076 100644 --- a/include/hw/i386/apic.h +++ b/include/hw/i386/apic.h @@ -10,6 +10,7 @@ void apic_deliver_nmi(DeviceState *d); int apic_get_interrupt(DeviceState *s); int cpu_set_apic_base(DeviceState *s, uint64_t val); uint64_t cpu_get_apic_base(DeviceState *s); +bool cpu_is_apic_enabled(DeviceState *s); void cpu_set_apic_tpr(DeviceState *s, uint8_t val); uint8_t cpu_get_apic_tpr(DeviceState *s); void apic_init_reset(DeviceState *s); From f70c1c068dbe5bb17f34c8b9c2195cd7f707f07e Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 6 Jan 2024 14:25:46 +0100 Subject: [PATCH 21/60] target/i386/cpu: Fix typo in comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Bernhard Beschow Reviewed-by: Alex Bennée Message-Id: <20240106132546.21248-4-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- target/i386/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 2126b0e589..0cd32a6fce 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -2179,7 +2179,7 @@ static const CPUCaches epyc_genoa_cache_info = { * Conceal VM entries from PT * Enable ENCLS exiting * Mode-based execute control (XS/XU) - s TSC scaling (Skylake Server and newer) + * TSC scaling (Skylake Server and newer) * GPA translation for PT (IceLake and newer) * User wait and pause * ENCLV exiting From 271c5bb3780773008a936f424cccf181a11b592c Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sun, 14 Jan 2024 13:39:01 +0100 Subject: [PATCH 22/60] hw/block/fdc-isa: Move portio_list from FDCtrl to FDCtrlISABus FDCtrl::portio_list isn't used inside FDCtrl context but only inside FDCtrlISABus context, so move it there. Signed-off-by: Bernhard Beschow Reviewed-by: BALATON Zoltan Message-Id: <20240114123911.4877-2-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/block/fdc-internal.h | 2 -- hw/block/fdc-isa.c | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/block/fdc-internal.h b/hw/block/fdc-internal.h index 036392e9fc..fef2bfbbf5 100644 --- a/hw/block/fdc-internal.h +++ b/hw/block/fdc-internal.h @@ -26,7 +26,6 @@ #define HW_BLOCK_FDC_INTERNAL_H #include "exec/memory.h" -#include "exec/ioport.h" #include "hw/block/block.h" #include "hw/block/fdc.h" #include "qapi/qapi-types-block.h" @@ -140,7 +139,6 @@ struct FDCtrl { /* Timers state */ uint8_t timer0; uint8_t timer1; - PortioList portio_list; }; extern const FDFormat fd_formats[]; diff --git a/hw/block/fdc-isa.c b/hw/block/fdc-isa.c index ad0921c7d3..2d8a98ce7d 100644 --- a/hw/block/fdc-isa.c +++ b/hw/block/fdc-isa.c @@ -42,6 +42,7 @@ #include "sysemu/block-backend.h" #include "sysemu/blockdev.h" #include "sysemu/sysemu.h" +#include "exec/ioport.h" #include "qemu/log.h" #include "qemu/main-loop.h" #include "qemu/module.h" @@ -60,6 +61,7 @@ struct FDCtrlISABus { uint32_t irq; uint32_t dma; struct FDCtrl state; + PortioList portio_list; int32_t bootindexA; int32_t bootindexB; }; @@ -91,7 +93,7 @@ static void isabus_fdc_realize(DeviceState *dev, Error **errp) FDCtrl *fdctrl = &isa->state; Error *err = NULL; - isa_register_portio_list(isadev, &fdctrl->portio_list, + isa_register_portio_list(isadev, &isa->portio_list, isa->iobase, fdc_portio_list, fdctrl, "fdc"); From ff453ce2819434d08fcaadca5d71b6e9a951ebdd Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sun, 14 Jan 2024 13:39:02 +0100 Subject: [PATCH 23/60] hw/block/fdc-sysbus: Move iomem from FDCtrl to FDCtrlSysBus FDCtrl::iomem isn't used inside FDCtrl context but only inside FDCtrlSysBus context, so move it there. Signed-off-by: Bernhard Beschow Reviewed-by: BALATON Zoltan Message-Id: <20240114123911.4877-3-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/block/fdc-internal.h | 2 -- hw/block/fdc-sysbus.c | 6 ++++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/block/fdc-internal.h b/hw/block/fdc-internal.h index fef2bfbbf5..e219623dc7 100644 --- a/hw/block/fdc-internal.h +++ b/hw/block/fdc-internal.h @@ -25,7 +25,6 @@ #ifndef HW_BLOCK_FDC_INTERNAL_H #define HW_BLOCK_FDC_INTERNAL_H -#include "exec/memory.h" #include "hw/block/block.h" #include "hw/block/fdc.h" #include "qapi/qapi-types-block.h" @@ -91,7 +90,6 @@ typedef struct FDrive { } FDrive; struct FDCtrl { - MemoryRegion iomem; qemu_irq irq; /* Controller state */ QEMUTimer *result_timer; diff --git a/hw/block/fdc-sysbus.c b/hw/block/fdc-sysbus.c index 266bc4d145..035bc08975 100644 --- a/hw/block/fdc-sysbus.c +++ b/hw/block/fdc-sysbus.c @@ -26,6 +26,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qom/object.h" +#include "exec/memory.h" #include "hw/sysbus.h" #include "hw/block/fdc.h" #include "migration/vmstate.h" @@ -52,6 +53,7 @@ struct FDCtrlSysBus { /*< public >*/ struct FDCtrl state; + MemoryRegion iomem; }; static uint64_t fdctrl_read_mem(void *opaque, hwaddr reg, unsigned ize) @@ -146,11 +148,11 @@ static void sysbus_fdc_common_instance_init(Object *obj) qdev_set_legacy_instance_id(dev, 0 /* io */, 2); /* FIXME */ - memory_region_init_io(&fdctrl->iomem, obj, + memory_region_init_io(&sys->iomem, obj, sbdc->use_strict_io ? &fdctrl_mem_strict_ops : &fdctrl_mem_ops, fdctrl, "fdc", 0x08); - sysbus_init_mmio(sbd, &fdctrl->iomem); + sysbus_init_mmio(sbd, &sys->iomem); sysbus_init_irq(sbd, &fdctrl->irq); qdev_init_gpio_in(dev, fdctrl_handle_tc, 1); From ee3d1f1b46e0c304ee4065b3099734158f322860 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sun, 14 Jan 2024 13:39:03 +0100 Subject: [PATCH 24/60] hw/char/parallel: Move portio_list from ParallelState to ISAParallelState ParallelState::portio_list isn't used inside ParallelState context but only inside ISAParallelState context, so move it there. Signed-off-by: Bernhard Beschow Reviewed-by: BALATON Zoltan Message-Id: <20240114123911.4877-4-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/char/parallel.c | 2 +- include/hw/char/parallel-isa.h | 2 ++ include/hw/char/parallel.h | 2 -- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/char/parallel.c b/hw/char/parallel.c index bd488cd7f9..c394635ada 100644 --- a/hw/char/parallel.c +++ b/hw/char/parallel.c @@ -532,7 +532,7 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp) s->status = dummy; } - isa_register_portio_list(isadev, &s->portio_list, base, + isa_register_portio_list(isadev, &isa->portio_list, base, (s->hw_driver ? &isa_parallel_portio_hw_list[0] : &isa_parallel_portio_sw_list[0]), diff --git a/include/hw/char/parallel-isa.h b/include/hw/char/parallel-isa.h index d24ccecf05..3b783bd08d 100644 --- a/include/hw/char/parallel-isa.h +++ b/include/hw/char/parallel-isa.h @@ -12,6 +12,7 @@ #include "parallel.h" +#include "exec/ioport.h" #include "hw/isa/isa.h" #include "qom/object.h" @@ -25,6 +26,7 @@ struct ISAParallelState { uint32_t iobase; uint32_t isairq; ParallelState state; + PortioList portio_list; }; #endif /* HW_PARALLEL_ISA_H */ diff --git a/include/hw/char/parallel.h b/include/hw/char/parallel.h index 7b5a309a03..cfb97cc7cc 100644 --- a/include/hw/char/parallel.h +++ b/include/hw/char/parallel.h @@ -1,7 +1,6 @@ #ifndef HW_PARALLEL_H #define HW_PARALLEL_H -#include "exec/ioport.h" #include "exec/memory.h" #include "hw/isa/isa.h" #include "hw/irq.h" @@ -22,7 +21,6 @@ typedef struct ParallelState { uint32_t last_read_offset; /* For debugging */ /* Memory-mapped interface */ int it_shift; - PortioList portio_list; } ParallelState; void parallel_hds_isa_init(ISABus *bus, int n); From 4edee342f81397e8938ba7a80d1908c5103b66c8 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sun, 14 Jan 2024 13:39:04 +0100 Subject: [PATCH 25/60] exec/ioport: Resolve redundant .base attribute in struct MemoryRegionPortio portio_list_add_1() creates a MemoryRegionPortioList instance which holds a MemoryRegion `mr` and an array of MemoryRegionPortio elements named `ports`. Each element in the array gets assigned the same value for its .base attribute. The same value also ends up as the .addr attribute of `mr` due to the memory_region_add_subregion() call. This means that all .base attributes are the same as `mr.addr`. The only usages of MemoryRegionPortio::base were in portio_read() and portio_write(). Both functions get above MemoryRegionPortioList as their opaque parameter. In both cases find_portio() can only return one of the MemoryRegionPortio elements of the `ports` array. Due to above observation any element will have the same .base value equal to `mr.addr` which is also accessible. Hence, `mrpio->mr.addr` is equivalent to `mrp->base` and MemoryRegionPortio::base is redundant and can be removed. Signed-off-by: Bernhard Beschow Message-Id: <20240114123911.4877-5-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/exec/ioport.h | 1 - system/ioport.c | 13 ++++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/include/exec/ioport.h b/include/exec/ioport.h index e34f668998..95f1dc30d0 100644 --- a/include/exec/ioport.h +++ b/include/exec/ioport.h @@ -35,7 +35,6 @@ typedef struct MemoryRegionPortio { unsigned size; uint32_t (*read)(void *opaque, uint32_t address); void (*write)(void *opaque, uint32_t address, uint32_t data); - uint32_t base; /* private field */ } MemoryRegionPortio; #define PORTIO_END_OF_LIST() { } diff --git a/system/ioport.c b/system/ioport.c index 1824aa808c..a59e58b716 100644 --- a/system/ioport.c +++ b/system/ioport.c @@ -181,13 +181,13 @@ static uint64_t portio_read(void *opaque, hwaddr addr, unsigned size) data = ((uint64_t)1 << (size * 8)) - 1; if (mrp) { - data = mrp->read(mrpio->portio_opaque, mrp->base + addr); + data = mrp->read(mrpio->portio_opaque, mrpio->mr.addr + addr); } else if (size == 2) { mrp = find_portio(mrpio, addr, 1, false); if (mrp) { - data = mrp->read(mrpio->portio_opaque, mrp->base + addr); + data = mrp->read(mrpio->portio_opaque, mrpio->mr.addr + addr); if (addr + 1 < mrp->offset + mrp->len) { - data |= mrp->read(mrpio->portio_opaque, mrp->base + addr + 1) << 8; + data |= mrp->read(mrpio->portio_opaque, mrpio->mr.addr + addr + 1) << 8; } else { data |= 0xff00; } @@ -203,13 +203,13 @@ static void portio_write(void *opaque, hwaddr addr, uint64_t data, const MemoryRegionPortio *mrp = find_portio(mrpio, addr, size, true); if (mrp) { - mrp->write(mrpio->portio_opaque, mrp->base + addr, data); + mrp->write(mrpio->portio_opaque, mrpio->mr.addr + addr, data); } else if (size == 2) { mrp = find_portio(mrpio, addr, 1, true); if (mrp) { - mrp->write(mrpio->portio_opaque, mrp->base + addr, data & 0xff); + mrp->write(mrpio->portio_opaque, mrpio->mr.addr + addr, data & 0xff); if (addr + 1 < mrp->offset + mrp->len) { - mrp->write(mrpio->portio_opaque, mrp->base + addr + 1, data >> 8); + mrp->write(mrpio->portio_opaque, mrpio->mr.addr + addr + 1, data >> 8); } } } @@ -244,7 +244,6 @@ static void portio_list_add_1(PortioList *piolist, /* Adjust the offsets to all be zero-based for the region. */ for (i = 0; i < count; ++i) { mrpio->ports[i].offset -= off_low; - mrpio->ports[i].base = start + off_low; } /* From ad2b652341f4257e2fb7ebf3834724f91173a07a Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sun, 14 Jan 2024 13:39:05 +0100 Subject: [PATCH 26/60] exec/ioport: Add portio_list_set_address() Some SuperI/O devices such as the VIA south bridges or the PC87312 controller are able to relocate their SuperI/O functions. Add a convenience function for implementing this in the VIA south bridges. This convenience function relies on previous simplifications in exec/ioport which avoids some duplicate synchronization of I/O port base addresses. The naming of the function is inspired by its memory_region_set_address() pendant. Signed-off-by: Bernhard Beschow Message-Id: <20240114123911.4877-6-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/devel/migration/main.rst | 5 +++-- include/exec/ioport.h | 2 ++ system/ioport.c | 19 +++++++++++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/docs/devel/migration/main.rst b/docs/devel/migration/main.rst index 00b9c3d32f..9439adcf66 100644 --- a/docs/devel/migration/main.rst +++ b/docs/devel/migration/main.rst @@ -431,10 +431,10 @@ data doesn't match the stored device data well; it allows an intermediate temporary structure to be populated with migration data and then transferred to the main structure. -If you use memory API functions that update memory layout outside +If you use memory or portio_list API functions that update memory layout outside initialization (i.e., in response to a guest action), this is a strong indication that you need to call these functions in a ``post_load`` callback. -Examples of such memory API functions are: +Examples of such API functions are: - memory_region_add_subregion() - memory_region_del_subregion() @@ -443,6 +443,7 @@ Examples of such memory API functions are: - memory_region_set_enabled() - memory_region_set_address() - memory_region_set_alias_offset() + - portio_list_set_address() Iterative device migration -------------------------- diff --git a/include/exec/ioport.h b/include/exec/ioport.h index 95f1dc30d0..96858e5ac3 100644 --- a/include/exec/ioport.h +++ b/include/exec/ioport.h @@ -54,6 +54,7 @@ typedef struct PortioList { const struct MemoryRegionPortio *ports; Object *owner; struct MemoryRegion *address_space; + uint32_t addr; unsigned nr; struct MemoryRegion **regions; void *opaque; @@ -70,5 +71,6 @@ void portio_list_add(PortioList *piolist, struct MemoryRegion *address_space, uint32_t addr); void portio_list_del(PortioList *piolist); +void portio_list_set_address(PortioList *piolist, uint32_t addr); #endif /* IOPORT_H */ diff --git a/system/ioport.c b/system/ioport.c index a59e58b716..000e0ee1af 100644 --- a/system/ioport.c +++ b/system/ioport.c @@ -133,6 +133,7 @@ void portio_list_init(PortioList *piolist, piolist->nr = 0; piolist->regions = g_new0(MemoryRegion *, n); piolist->address_space = NULL; + piolist->addr = 0; piolist->opaque = opaque; piolist->owner = owner; piolist->name = name; @@ -282,6 +283,7 @@ void portio_list_add(PortioList *piolist, unsigned int off_low, off_high, off_last, count; piolist->address_space = address_space; + piolist->addr = start; /* Handle the first entry specially. */ off_last = off_low = pio_start->offset; @@ -322,6 +324,23 @@ void portio_list_del(PortioList *piolist) } } +void portio_list_set_address(PortioList *piolist, uint32_t addr) +{ + MemoryRegionPortioList *mrpio; + unsigned i, j; + + for (i = 0; i < piolist->nr; ++i) { + mrpio = container_of(piolist->regions[i], MemoryRegionPortioList, mr); + memory_region_set_address(&mrpio->mr, + mrpio->mr.addr - piolist->addr + addr); + for (j = 0; mrpio->ports[j].size; ++j) { + mrpio->ports[j].offset += addr - piolist->addr; + } + } + + piolist->addr = addr; +} + static void memory_region_portio_list_finalize(Object *obj) { MemoryRegionPortioList *mrpio = MEMORY_REGION_PORTIO_LIST(obj); From f165cdf102bb93c255c63b77617ba371d73344e0 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sun, 14 Jan 2024 13:39:06 +0100 Subject: [PATCH 27/60] exec/ioport: Add portio_list_set_enabled() Some SuperI/O devices such as the VIA south bridges or the PC87312 controller allow to enable or disable their SuperI/O functions. Add a convenience function for implementing this in the VIA south bridges. The naming of the functions is inspired by its memory_region_set_enabled() pendant. Signed-off-by: Bernhard Beschow Message-Id: <20240114123911.4877-7-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/devel/migration/main.rst | 1 + include/exec/ioport.h | 1 + system/ioport.c | 9 +++++++++ 3 files changed, 11 insertions(+) diff --git a/docs/devel/migration/main.rst b/docs/devel/migration/main.rst index 9439adcf66..331252a92c 100644 --- a/docs/devel/migration/main.rst +++ b/docs/devel/migration/main.rst @@ -444,6 +444,7 @@ Examples of such API functions are: - memory_region_set_address() - memory_region_set_alias_offset() - portio_list_set_address() + - portio_list_set_enabled() Iterative device migration -------------------------- diff --git a/include/exec/ioport.h b/include/exec/ioport.h index 96858e5ac3..4397f12f93 100644 --- a/include/exec/ioport.h +++ b/include/exec/ioport.h @@ -71,6 +71,7 @@ void portio_list_add(PortioList *piolist, struct MemoryRegion *address_space, uint32_t addr); void portio_list_del(PortioList *piolist); +void portio_list_set_enabled(PortioList *piolist, bool enabled); void portio_list_set_address(PortioList *piolist, uint32_t addr); #endif /* IOPORT_H */ diff --git a/system/ioport.c b/system/ioport.c index 000e0ee1af..fd551d0375 100644 --- a/system/ioport.c +++ b/system/ioport.c @@ -324,6 +324,15 @@ void portio_list_del(PortioList *piolist) } } +void portio_list_set_enabled(PortioList *piolist, bool enabled) +{ + unsigned i; + + for (i = 0; i < piolist->nr; ++i) { + memory_region_set_enabled(piolist->regions[i], enabled); + } +} + void portio_list_set_address(PortioList *piolist, uint32_t addr) { MemoryRegionPortioList *mrpio; From 8c4d239139e93268884f9d385a0966ef40db422f Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sun, 14 Jan 2024 13:39:07 +0100 Subject: [PATCH 28/60] hw/block/fdc-isa: Implement relocation and enabling/disabling for TYPE_ISA_FDC The real SuperI/O chips emulated by QEMU allow for relocating and enabling or disabling their SuperI/O functions via software. So far this is not implemented. Prepare for that by adding isa_fdc_set_{enabled,iobase}. Signed-off-by: Bernhard Beschow Message-Id: <20240114123911.4877-8-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/block/fdc-isa.c | 14 ++++++++++++++ include/hw/block/fdc.h | 3 +++ 2 files changed, 17 insertions(+) diff --git a/hw/block/fdc-isa.c b/hw/block/fdc-isa.c index 2d8a98ce7d..e43dc532af 100644 --- a/hw/block/fdc-isa.c +++ b/hw/block/fdc-isa.c @@ -192,6 +192,20 @@ static Aml *build_fdinfo_aml(int idx, FloppyDriveType type) return dev; } +void isa_fdc_set_iobase(ISADevice *fdc, hwaddr iobase) +{ + FDCtrlISABus *isa = ISA_FDC(fdc); + + fdc->ioport_id = iobase; + isa->iobase = iobase; + portio_list_set_address(&isa->portio_list, isa->iobase); +} + +void isa_fdc_set_enabled(ISADevice *fdc, bool enabled) +{ + portio_list_set_enabled(&ISA_FDC(fdc)->portio_list, enabled); +} + int cmos_get_fd_drive_type(FloppyDriveType fd0) { int val; diff --git a/include/hw/block/fdc.h b/include/hw/block/fdc.h index 35248c0837..c367c5efea 100644 --- a/include/hw/block/fdc.h +++ b/include/hw/block/fdc.h @@ -14,6 +14,9 @@ void fdctrl_init_sysbus(qemu_irq irq, hwaddr mmio_base, DriveInfo **fds); void sun4m_fdctrl_init(qemu_irq irq, hwaddr io_base, DriveInfo **fds, qemu_irq *fdc_tc); +void isa_fdc_set_iobase(ISADevice *fdc, hwaddr iobase); +void isa_fdc_set_enabled(ISADevice *fdc, bool enabled); + FloppyDriveType isa_fdc_get_drive_type(ISADevice *fdc, int i); int cmos_get_fd_drive_type(FloppyDriveType fd0); From 7812dbc54c72f71df2644d0cec52a1e8d6b19584 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sun, 14 Jan 2024 13:39:08 +0100 Subject: [PATCH 29/60] hw/char/serial-isa: Implement relocation and enabling/disabling for TYPE_ISA_SERIAL The real SuperI/O chips emulated by QEMU allow for relocating and enabling or disabling their SuperI/O functions via software. So far this is not implemented. Prepare for that by adding isa_serial_set_{enabled,iobase}. Signed-off-by: Bernhard Beschow Message-Id: <20240114123911.4877-9-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/char/serial-isa.c | 14 ++++++++++++++ include/hw/char/serial.h | 2 ++ 2 files changed, 16 insertions(+) diff --git a/hw/char/serial-isa.c b/hw/char/serial-isa.c index 1c793b20f7..329b352b9a 100644 --- a/hw/char/serial-isa.c +++ b/hw/char/serial-isa.c @@ -184,3 +184,17 @@ void serial_hds_isa_init(ISABus *bus, int from, int to) } } } + +void isa_serial_set_iobase(ISADevice *serial, hwaddr iobase) +{ + ISASerialState *s = ISA_SERIAL(serial); + + serial->ioport_id = iobase; + s->iobase = iobase; + memory_region_set_address(&s->state.io, s->iobase); +} + +void isa_serial_set_enabled(ISADevice *serial, bool enabled) +{ + memory_region_set_enabled(&ISA_SERIAL(serial)->state.io, enabled); +} diff --git a/include/hw/char/serial.h b/include/hw/char/serial.h index 8ba7eca3d6..6e14099ee7 100644 --- a/include/hw/char/serial.h +++ b/include/hw/char/serial.h @@ -112,5 +112,7 @@ SerialMM *serial_mm_init(MemoryRegion *address_space, #define TYPE_ISA_SERIAL "isa-serial" void serial_hds_isa_init(ISABus *bus, int from, int to); +void isa_serial_set_iobase(ISADevice *serial, hwaddr iobase); +void isa_serial_set_enabled(ISADevice *serial, bool enabled); #endif From 1d1afd9ff7264c7ed35f3ca25cc4bf9dd82a6b06 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sun, 14 Jan 2024 13:39:09 +0100 Subject: [PATCH 30/60] hw/char/parallel-isa: Implement relocation and enabling/disabling for TYPE_ISA_PARALLEL The real SuperI/O chips emulated by QEMU allow for relocating and enabling or disabling their SuperI/O functions via software. So far this is not implemented. Prepare for that by adding isa_parallel_set_{enabled,iobase}. Signed-off-by: Bernhard Beschow Message-Id: <20240114123911.4877-10-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/char/parallel-isa.c | 14 ++++++++++++++ include/hw/char/parallel-isa.h | 3 +++ 2 files changed, 17 insertions(+) diff --git a/hw/char/parallel-isa.c b/hw/char/parallel-isa.c index ab0f879998..a5ce6ee13a 100644 --- a/hw/char/parallel-isa.c +++ b/hw/char/parallel-isa.c @@ -41,3 +41,17 @@ void parallel_hds_isa_init(ISABus *bus, int n) } } } + +void isa_parallel_set_iobase(ISADevice *parallel, hwaddr iobase) +{ + ISAParallelState *s = ISA_PARALLEL(parallel); + + parallel->ioport_id = iobase; + s->iobase = iobase; + portio_list_set_address(&s->portio_list, s->iobase); +} + +void isa_parallel_set_enabled(ISADevice *parallel, bool enabled) +{ + portio_list_set_enabled(&ISA_PARALLEL(parallel)->portio_list, enabled); +} diff --git a/include/hw/char/parallel-isa.h b/include/hw/char/parallel-isa.h index 3b783bd08d..5284b2ffec 100644 --- a/include/hw/char/parallel-isa.h +++ b/include/hw/char/parallel-isa.h @@ -29,4 +29,7 @@ struct ISAParallelState { PortioList portio_list; }; +void isa_parallel_set_iobase(ISADevice *parallel, hwaddr iobase); +void isa_parallel_set_enabled(ISADevice *parallel, bool enabled); + #endif /* HW_PARALLEL_ISA_H */ From 79a7f53065abea95b33e2212dcfb58e1de4be479 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sun, 14 Jan 2024 13:39:10 +0100 Subject: [PATCH 31/60] hw/ppc/pegasos2: Let pegasos2 machine configure SuperI/O functions This is a preparation for implementing relocation and toggling of SuperI/O functions in the VT8231 device model. Upon reset, all SuperI/O functions will be deactivated, so in case if no -bios is given, let the machine configure those functions the same way Pegasos II firmware would do. Signed-off-by: Bernhard Beschow Reviewed-by: BALATON Zoltan Message-Id: <20240114123911.4877-11-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/ppc/pegasos2.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c index d84f3f977d..04d6decb2b 100644 --- a/hw/ppc/pegasos2.c +++ b/hw/ppc/pegasos2.c @@ -285,6 +285,12 @@ static void pegasos2_pci_config_write(Pegasos2MachineState *pm, int bus, pegasos2_mv_reg_write(pm, pcicfg + 4, len, val); } +static void pegasos2_superio_write(uint8_t addr, uint8_t val) +{ + cpu_physical_memory_write(PCI1_IO_BASE + 0x3f0, &addr, 1); + cpu_physical_memory_write(PCI1_IO_BASE + 0x3f1, &val, 1); +} + static void pegasos2_machine_reset(MachineState *machine, ShutdownCause reason) { Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine); @@ -310,6 +316,12 @@ static void pegasos2_machine_reset(MachineState *machine, ShutdownCause reason) pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) | PCI_INTERRUPT_LINE, 2, 0x9); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) | + 0x50, 1, 0x6); + pegasos2_superio_write(0xf4, 0xbe); + pegasos2_superio_write(0xf6, 0xef); + pegasos2_superio_write(0xf7, 0xfc); + pegasos2_superio_write(0xf2, 0x14); pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) | 0x50, 1, 0x2); pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) | From 35a6380b4ed27f8355330b1fb0918b20c728d30e Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sun, 14 Jan 2024 13:39:11 +0100 Subject: [PATCH 32/60] hw/isa/vt82c686: Implement relocation and toggling of SuperI/O functions The VIA south bridges are able to relocate and toggle (enable or disable) their SuperI/O functions. So far this is hardcoded such that all functions are always enabled and are located at fixed addresses. Some PC BIOSes seem to probe for I/O occupancy before activating such a function and issue an error in case of a conflict. Since the functions are currently enabled on reset, conflicts are always detected. Prevent that by implementing relocation and toggling of the SuperI/O functions. Note that all SuperI/O functions are now deactivated upon reset (except for VT82C686B's serial ports where Fuloong 2e's rescue-yl seems to expect them to be enabled by default). Rely on firmware to configure the functions accordingly. Signed-off-by: Bernhard Beschow Reviewed-by: BALATON Zoltan Message-Id: <20240114123911.4877-12-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/isa/vt82c686.c | 65 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 10 deletions(-) diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c index d3e0f6d01f..485bb685b7 100644 --- a/hw/isa/vt82c686.c +++ b/hw/isa/vt82c686.c @@ -15,6 +15,9 @@ #include "qemu/osdep.h" #include "hw/isa/vt82c686.h" +#include "hw/block/fdc.h" +#include "hw/char/parallel-isa.h" +#include "hw/char/serial.h" #include "hw/pci/pci.h" #include "hw/qdev-properties.h" #include "hw/ide/pci.h" @@ -323,6 +326,17 @@ static uint64_t via_superio_cfg_read(void *opaque, hwaddr addr, unsigned size) return val; } +static void via_superio_devices_enable(ViaSuperIOState *s, uint8_t data) +{ + ISASuperIOClass *ic = ISA_SUPERIO_GET_CLASS(s); + + isa_parallel_set_enabled(s->superio.parallel[0], (data & 0x3) != 3); + for (int i = 0; i < ic->serial.count; i++) { + isa_serial_set_enabled(s->superio.serial[i], data & BIT(i + 2)); + } + isa_fdc_set_enabled(s->superio.floppy, data & BIT(4)); +} + static void via_superio_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -368,7 +382,25 @@ static void vt82c686b_superio_cfg_write(void *opaque, hwaddr addr, case 0xfd ... 0xff: /* ignore write to read only registers */ return; - /* case 0xe6 ... 0xe8: Should set base port of parallel and serial */ + case 0xe2: + data &= 0x1f; + via_superio_devices_enable(sc, data); + break; + case 0xe3: + data &= 0xfc; + isa_fdc_set_iobase(sc->superio.floppy, data << 2); + break; + case 0xe6: + isa_parallel_set_iobase(sc->superio.parallel[0], data << 2); + break; + case 0xe7: + data &= 0xfe; + isa_serial_set_iobase(sc->superio.serial[0], data << 2); + break; + case 0xe8: + data &= 0xfe; + isa_serial_set_iobase(sc->superio.serial[1], data << 2); + break; default: qemu_log_mask(LOG_UNIMP, "via_superio_cfg: unimplemented register 0x%x\n", idx); @@ -395,9 +427,14 @@ static void vt82c686b_superio_reset(DeviceState *dev) /* Device ID */ vt82c686b_superio_cfg_write(s, 0, 0xe0, 1); vt82c686b_superio_cfg_write(s, 1, 0x3c, 1); - /* Function select - all disabled */ + /* + * Function select - only serial enabled + * Fuloong 2e's rescue-yl prints to the serial console w/o enabling it. This + * suggests that the serial ports are enabled by default, so override the + * datasheet. + */ vt82c686b_superio_cfg_write(s, 0, 0xe2, 1); - vt82c686b_superio_cfg_write(s, 1, 0x03, 1); + vt82c686b_superio_cfg_write(s, 1, 0x0f, 1); /* Floppy ctrl base addr 0x3f0-7 */ vt82c686b_superio_cfg_write(s, 0, 0xe3, 1); vt82c686b_superio_cfg_write(s, 1, 0xfc, 1); @@ -465,6 +502,21 @@ static void vt8231_superio_cfg_write(void *opaque, hwaddr addr, case 0xfd: /* ignore write to read only registers */ return; + case 0xf2: + data &= 0x17; + via_superio_devices_enable(sc, data); + break; + case 0xf4: + data &= 0xfe; + isa_serial_set_iobase(sc->superio.serial[0], data << 2); + break; + case 0xf6: + isa_parallel_set_iobase(sc->superio.parallel[0], data << 2); + break; + case 0xf7: + data &= 0xfc; + isa_fdc_set_iobase(sc->superio.floppy, data << 2); + break; default: qemu_log_mask(LOG_UNIMP, "via_superio_cfg: unimplemented register 0x%x\n", idx); @@ -513,12 +565,6 @@ static void vt8231_superio_init(Object *obj) VIA_SUPERIO(obj)->io_ops = &vt8231_superio_cfg_ops; } -static uint16_t vt8231_superio_serial_iobase(ISASuperIODevice *sio, - uint8_t index) -{ - return 0x2f8; /* FIXME: This should be settable via registers f2-f4 */ -} - static void vt8231_superio_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -526,7 +572,6 @@ static void vt8231_superio_class_init(ObjectClass *klass, void *data) dc->reset = vt8231_superio_reset; sc->serial.count = 1; - sc->serial.get_iobase = vt8231_superio_serial_iobase; sc->parallel.count = 1; sc->ide.count = 0; /* emulated by via-ide */ sc->floppy.count = 1; From aa05bd9ef4073ccb72d04ad78de32916af31c7c3 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Thu, 11 Jan 2024 16:45:55 -0800 Subject: [PATCH 33/60] vhost-user.rst: Fix vring address description There is no "size" field in vring address structure. Remove it. Fixes: 5fc0e00291 ("Add vhost-user protocol documentation") Signed-off-by: Andrey Ignatov Message-Id: <20240112004555.64900-1-rdna@apple.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/interop/vhost-user.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst index 9f1103f85a..ad6e142f23 100644 --- a/docs/interop/vhost-user.rst +++ b/docs/interop/vhost-user.rst @@ -148,9 +148,9 @@ Vring descriptor indices for packed virtqueues A vring address description ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -+-------+-------+------+------------+------+-----------+-----+ -| index | flags | size | descriptor | used | available | log | -+-------+-------+------+------------+------+-----------+-----+ ++-------+-------+------------+------+-----------+-----+ +| index | flags | descriptor | used | available | log | ++-------+-------+------------+------+-----------+-----+ :index: a 32-bit vring index From e8058c6d65252d920abf0bee027c455e8ffe41ff Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 18 Jan 2024 17:10:35 +0800 Subject: [PATCH 34/60] MAINTAINERS: Drop myself as VT-d maintainers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Due to my own limitation on bandwidth, I noticed that unfortunately I won't have time to review VT-d patches at least in the near future. Meanwhile I expect a lot of possibilities could actually happen in this area in the near future. To reflect that reality, I decided to drop myself from the VT-d role. It shouldn't affect much since we still have Jason around like usual, and Michael on top. But I assume it'll always be good if anyone would like to fill this role up. I'll still work on QEMU. So I suppose anyone can still copy me if one thinks essential. Cc: Michael S. Tsirkin Cc: Jason Wang Signed-off-by: Peter Xu Message-Id: <20240118091035.48178-1-peterx@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Acked-by: Jason Wang --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 89f2d31f70..7a1afb40ac 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3630,7 +3630,6 @@ F: tests/uefi-test-tools/ VT-d Emulation M: Michael S. Tsirkin -M: Peter Xu R: Jason Wang S: Supported F: hw/i386/intel_iommu.c From 9a457383ce9d309d4679b079fafb51f0a2d949aa Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 25 Jan 2024 15:37:05 +0800 Subject: [PATCH 35/60] virtio_iommu: Clear IOMMUPciBus pointer cache when system reset s->iommu_pcibus_by_bus_num is a IOMMUPciBus pointer cache indexed by bus number, bus number may not always be a fixed value, i.e., guest reboot to different kernel which set bus number with different algorithm. This could lead to endpoint binding to wrong iommu MR in virtio_iommu_get_endpoint(), then vfio device setup wrong mapping from other device. Remove the memset in virtio_iommu_device_realize() to avoid redundancy with memset in system reset. Signed-off-by: Zhenzhong Duan Message-Id: <20240125073706.339369-2-zhenzhong.duan@intel.com> Reviewed-by: Eric Auger Tested-by: Eric Auger Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 8a4bd933c6..86623d55a5 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -1264,6 +1264,8 @@ static void virtio_iommu_system_reset(void *opaque) trace_virtio_iommu_system_reset(); + memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num)); + /* * config.bypass is sticky across device reset, but should be restored on * system reset @@ -1302,8 +1304,6 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) virtio_init(vdev, VIRTIO_ID_IOMMU, sizeof(struct virtio_iommu_config)); - memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num)); - s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, virtio_iommu_handle_command); s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL); From 8a6b3f4dc95a064e88adaca86374108da0ecb38d Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 25 Jan 2024 15:37:06 +0800 Subject: [PATCH 36/60] smmu: Clear SMMUPciBus pointer cache when system reset s->smmu_pcibus_by_bus_num is a SMMUPciBus pointer cache indexed by bus number, bus number may not always be a fixed value, i.e., guest reboot to different kernel which set bus number with different algorithm. This could lead to smmu_iommu_mr() providing the wrong iommu MR. Suggested-by: Eric Auger Signed-off-by: Zhenzhong Duan Message-Id: <20240125073706.339369-3-zhenzhong.duan@intel.com> Reviewed-by: Eric Auger Tested-by: Eric Auger Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/arm/smmu-common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c index 9a8ac45431..f58261bb81 100644 --- a/hw/arm/smmu-common.c +++ b/hw/arm/smmu-common.c @@ -675,6 +675,8 @@ static void smmu_base_reset_hold(Object *obj) { SMMUState *s = ARM_SMMU(obj); + memset(s->smmu_pcibus_by_bus_num, 0, sizeof(s->smmu_pcibus_by_bus_num)); + g_hash_table_remove_all(s->configs); g_hash_table_remove_all(s->iotlb); } From c62926f730d08450502d36548e28dd727c998ace Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 26 Jan 2024 12:01:21 +0000 Subject: [PATCH 37/60] cxl/cdat: Handle cdat table build errors The callback for building CDAT tables may return negative error codes. This was previously unhandled and will result in potentially huge allocations later on in ct3_build_cdat() Detect the negative error code and defer cdat building. Fixes: f5ee7413d592 ("hw/mem/cxl-type3: Add CXL CDAT Data Object Exchange") Cc: Huai-Cheng Kuo Reviewed-by: Dave Jiang Reviewed-by: Fan Ni Signed-off-by: Ira Weiny Signed-off-by: Jonathan Cameron Message-Id: <20240126120132.24248-2-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-cdat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/cxl/cxl-cdat.c b/hw/cxl/cxl-cdat.c index 639a2db3e1..24829cf242 100644 --- a/hw/cxl/cxl-cdat.c +++ b/hw/cxl/cxl-cdat.c @@ -63,7 +63,7 @@ static void ct3_build_cdat(CDATObject *cdat, Error **errp) cdat->built_buf_len = cdat->build_cdat_table(&cdat->built_buf, cdat->private); - if (!cdat->built_buf_len) { + if (cdat->built_buf_len <= 0) { /* Build later as not all data available yet */ cdat->to_update = true; return; From 0dbcc0ce2f7b7a98a11224add69b2f2f2b8125da Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 26 Jan 2024 12:01:22 +0000 Subject: [PATCH 38/60] hw/mem/cxl_type3: Drop handling of failure of g_malloc0() and g_malloc() As g_malloc0/g_malloc() will just exit QEMU on failure there is no point in checking for it failing. Reviewed-by: Fan Ni Signed-off-by: Jonathan Cameron Message-Id: <20240126120132.24248-3-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/cxl_type3.c | 52 +++++++--------------------------------------- 1 file changed, 7 insertions(+), 45 deletions(-) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 52647b4ac7..1b92a065a3 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -42,9 +42,9 @@ enum { CT3_CDAT_NUM_ENTRIES }; -static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, - int dsmad_handle, MemoryRegion *mr, - bool is_pmem, uint64_t dpa_base) +static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, + int dsmad_handle, MemoryRegion *mr, + bool is_pmem, uint64_t dpa_base) { g_autofree CDATDsmas *dsmas = NULL; g_autofree CDATDslbis *dslbis0 = NULL; @@ -54,9 +54,6 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, g_autofree CDATDsemts *dsemts = NULL; dsmas = g_malloc(sizeof(*dsmas)); - if (!dsmas) { - return -ENOMEM; - } *dsmas = (CDATDsmas) { .header = { .type = CDAT_TYPE_DSMAS, @@ -70,9 +67,6 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, /* For now, no memory side cache, plausiblish numbers */ dslbis0 = g_malloc(sizeof(*dslbis0)); - if (!dslbis0) { - return -ENOMEM; - } *dslbis0 = (CDATDslbis) { .header = { .type = CDAT_TYPE_DSLBIS, @@ -86,9 +80,6 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, }; dslbis1 = g_malloc(sizeof(*dslbis1)); - if (!dslbis1) { - return -ENOMEM; - } *dslbis1 = (CDATDslbis) { .header = { .type = CDAT_TYPE_DSLBIS, @@ -102,9 +93,6 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, }; dslbis2 = g_malloc(sizeof(*dslbis2)); - if (!dslbis2) { - return -ENOMEM; - } *dslbis2 = (CDATDslbis) { .header = { .type = CDAT_TYPE_DSLBIS, @@ -118,9 +106,6 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, }; dslbis3 = g_malloc(sizeof(*dslbis3)); - if (!dslbis3) { - return -ENOMEM; - } *dslbis3 = (CDATDslbis) { .header = { .type = CDAT_TYPE_DSLBIS, @@ -134,9 +119,6 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, }; dsemts = g_malloc(sizeof(*dsemts)); - if (!dsemts) { - return -ENOMEM; - } *dsemts = (CDATDsemts) { .header = { .type = CDAT_TYPE_DSEMTS, @@ -159,8 +141,6 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, cdat_table[CT3_CDAT_DSLBIS2] = g_steal_pointer(&dslbis2); cdat_table[CT3_CDAT_DSLBIS3] = g_steal_pointer(&dslbis3); cdat_table[CT3_CDAT_DSEMTS] = g_steal_pointer(&dsemts); - - return 0; } static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv) @@ -171,7 +151,6 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv) int dsmad_handle = 0; int cur_ent = 0; int len = 0; - int rc, i; if (!ct3d->hostpmem && !ct3d->hostvmem) { return 0; @@ -194,27 +173,18 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv) } table = g_malloc0(len * sizeof(*table)); - if (!table) { - return -ENOMEM; - } /* Now fill them in */ if (volatile_mr) { - rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr, - false, 0); - if (rc < 0) { - return rc; - } + ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr, + false, 0); cur_ent = CT3_CDAT_NUM_ENTRIES; } if (nonvolatile_mr) { uint64_t base = volatile_mr ? memory_region_size(volatile_mr) : 0; - rc = ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++, - nonvolatile_mr, true, base); - if (rc < 0) { - goto error_cleanup; - } + ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++, + nonvolatile_mr, true, base); cur_ent += CT3_CDAT_NUM_ENTRIES; } assert(len == cur_ent); @@ -222,11 +192,6 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv) *cdat_table = g_steal_pointer(&table); return len; -error_cleanup: - for (i = 0; i < cur_ent; i++) { - g_free(table[i]); - } - return rc; } static void ct3_free_cdat_table(CDATSubHeader **cdat_table, int num, void *priv) @@ -1168,9 +1133,6 @@ void qmp_cxl_inject_uncorrectable_errors(const char *path, } cxl_err = g_malloc0(sizeof(*cxl_err)); - if (!cxl_err) { - return; - } cxl_err->type = cxl_err_code; while (header && header_count < 32) { From 99747b71baf278068b5938ccdc66d6c906ed437e Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 26 Jan 2024 12:01:23 +0000 Subject: [PATCH 39/60] hw/pci-bridge/cxl_upstream: Drop g_malloc() failure handling As a failure of g_malloc() will result in QEMU exiting, it won't return a NULL to check. As such, drop the incorrect handling of such NULL returns in the cdat table building code. Signed-off-by: Jonathan Cameron Message-Id: <20240126120132.24248-4-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/cxl_upstream.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c index 36737189c6..d5341b530f 100644 --- a/hw/pci-bridge/cxl_upstream.c +++ b/hw/pci-bridge/cxl_upstream.c @@ -228,9 +228,6 @@ static int build_cdat_table(CDATSubHeader ***cdat_table, void *priv) sslbis_size = sizeof(CDATSslbis) + sizeof(*sslbis_latency->sslbe) * count; sslbis_latency = g_malloc(sslbis_size); - if (!sslbis_latency) { - return -ENOMEM; - } *sslbis_latency = (CDATSslbis) { .sslbis_header = { .header = { @@ -251,9 +248,6 @@ static int build_cdat_table(CDATSubHeader ***cdat_table, void *priv) } sslbis_bandwidth = g_malloc(sslbis_size); - if (!sslbis_bandwidth) { - return 0; - } *sslbis_bandwidth = (CDATSslbis) { .sslbis_header = { .header = { From 64fdad5e67587e88c2f1d8f294e89403856a4a31 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 26 Jan 2024 12:01:24 +0000 Subject: [PATCH 40/60] cxl/cdat: Fix header sum value in CDAT checksum The addition of the DCD support for CXL type-3 devices extended the CDAT table large enough that the checksum being returned was incorrect.[1] This was because the checksum value was using the header length field rather than each of the 4 bytes of the length field. This was previously not seen because the length of the CDAT data was less than 256 thus resulting in an equivalent checksum value. Properly calculate the checksum for the CDAT header. [1] https://lore.kernel.org/all/20231116-fix-cdat-devm-free-v1-1-b148b40707d7@intel.com/ Fixes: aba578bdace5 ("hw/cxl/cdat: CXL CDAT Data Object Exchange implementation") Cc: Huai-Cheng Kuo Signed-off-by: Ira Weiny Reviewed-by: Dave Jiang Reviewed-by: Fan Ni Signed-off-by: Jonathan Cameron Message-Id: <20240126120132.24248-5-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-cdat.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/hw/cxl/cxl-cdat.c b/hw/cxl/cxl-cdat.c index 24829cf242..2fea975671 100644 --- a/hw/cxl/cxl-cdat.c +++ b/hw/cxl/cxl-cdat.c @@ -49,6 +49,7 @@ static void ct3_build_cdat(CDATObject *cdat, Error **errp) g_autofree CDATTableHeader *cdat_header = NULL; g_autofree CDATEntry *cdat_st = NULL; uint8_t sum = 0; + uint8_t *hdr_buf; int ent, i; /* Use default table if fopen == NULL */ @@ -95,8 +96,12 @@ static void ct3_build_cdat(CDATObject *cdat, Error **errp) /* For now, no runtime updates */ cdat_header->sequence = 0; cdat_header->length += sizeof(CDATTableHeader); - sum += cdat_header->revision + cdat_header->sequence + - cdat_header->length; + + hdr_buf = (uint8_t *)cdat_header; + for (i = 0; i < sizeof(*cdat_header); i++) { + sum += hdr_buf[i]; + } + /* Sum of all bytes including checksum must be 0 */ cdat_header->checksum = ~sum + 1; From 7031ee540b7e25a8f38d7b855ed99c7c5f68200d Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 26 Jan 2024 12:01:25 +0000 Subject: [PATCH 41/60] hw/cxl/mbox: Remove dead code Two functions were reported to have dead code, remove the bogus branches altogether, as well as a misplaced qemu_log call. Reported-by: Peter Maydell Reviewed-by: Fan Ni Signed-off-by: Davidlohr Bueso Signed-off-by: Jonathan Cameron Message-Id: <20240126120132.24248-6-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 43 +++++++++++++------------------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 6eff56fb1b..dc42850767 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -1001,15 +1001,8 @@ static CXLRetCode cmd_sanitize_overwrite(const struct cxl_cmd *cmd, cxl_dev_disable_media(&ct3d->cxl_dstate); - if (secs > 2) { - /* sanitize when done */ - return CXL_MBOX_BG_STARTED; - } else { - __do_sanitization(ct3d); - cxl_dev_enable_media(&ct3d->cxl_dstate); - - return CXL_MBOX_SUCCESS; - } + /* sanitize when done */ + return CXL_MBOX_BG_STARTED; } static CXLRetCode cmd_get_security_state(const struct cxl_cmd *cmd, @@ -1387,27 +1380,21 @@ static void bg_timercb(void *opaque) cci->bg.complete_pct = 100; cci->bg.ret_code = ret; - if (ret == CXL_MBOX_SUCCESS) { - switch (cci->bg.opcode) { - case 0x4400: /* sanitize */ - { - CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + switch (cci->bg.opcode) { + case 0x4400: /* sanitize */ + { + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); - __do_sanitization(ct3d); - cxl_dev_enable_media(&ct3d->cxl_dstate); - } - break; - case 0x4304: /* TODO: scan media */ - break; - default: - __builtin_unreachable(); - break; - } + __do_sanitization(ct3d); + cxl_dev_enable_media(&ct3d->cxl_dstate); + } + break; + case 0x4304: /* TODO: scan media */ + break; + default: + __builtin_unreachable(); + break; } - - qemu_log("Background command %04xh finished: %s\n", - cci->bg.opcode, - ret == CXL_MBOX_SUCCESS ? "success" : "aborted"); } else { /* estimate only */ cci->bg.complete_pct = 100 * now / total_time; From f7509f462c788a347521f90f19d623908c4fbcc5 Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Fri, 26 Jan 2024 12:01:26 +0000 Subject: [PATCH 42/60] hw/cxl/device: read from register values in mdev_reg_read() In the current mdev_reg_read() implementation, it consistently returns that the Media Status is Ready (01b). This was fine until commit 25a52959f99d ("hw/cxl: Add support for device sanitation") because the media was presumed to be ready. However, as per the CXL 3.0 spec "8.2.9.8.5.1 Sanitize (Opcode 4400h)", during sanitation, the Media State should be set to Disabled (11b). The mentioned commit correctly sets it to Disabled, but mdev_reg_read() still returns Media Status as Ready. To address this, update mdev_reg_read() to read register values instead of returning dummy values. Note that __toggle_media() managed to not only write something that no one read, it did it to the wrong register storage and so changed the reported mailbox size which was definitely not the intent. That gets fixed as a side effect of allocating separate state storage for this register. Fixes: commit 25a52959f99d ("hw/cxl: Add support for device sanitation") Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Fan Ni Signed-off-by: Jonathan Cameron Message-Id: <20240126120132.24248-7-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-device-utils.c | 17 +++++++++++------ include/hw/cxl/cxl_device.h | 9 +++++++-- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c index 61a3c4dc2e..40b619ffd9 100644 --- a/hw/cxl/cxl-device-utils.c +++ b/hw/cxl/cxl-device-utils.c @@ -229,12 +229,9 @@ static void mailbox_reg_write(void *opaque, hwaddr offset, uint64_t value, static uint64_t mdev_reg_read(void *opaque, hwaddr offset, unsigned size) { - uint64_t retval = 0; + CXLDeviceState *cxl_dstate = opaque; - retval = FIELD_DP64(retval, CXL_MEM_DEV_STS, MEDIA_STATUS, 1); - retval = FIELD_DP64(retval, CXL_MEM_DEV_STS, MBOX_READY, 1); - - return retval; + return cxl_dstate->memdev_status; } static void ro_reg_write(void *opaque, hwaddr offset, uint64_t value, @@ -371,7 +368,15 @@ static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate) cxl_dstate->mbox_msi_n = msi_n; } -static void memdev_reg_init_common(CXLDeviceState *cxl_dstate) { } +static void memdev_reg_init_common(CXLDeviceState *cxl_dstate) +{ + uint64_t memdev_status_reg; + + memdev_status_reg = FIELD_DP64(0, CXL_MEM_DEV_STS, MEDIA_STATUS, 1); + memdev_status_reg = FIELD_DP64(memdev_status_reg, CXL_MEM_DEV_STS, + MBOX_READY, 1); + cxl_dstate->memdev_status = memdev_status_reg; +} void cxl_device_register_init_t3(CXLType3Dev *ct3d) { diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index befb5f884b..31d2afcd3d 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -202,6 +202,9 @@ typedef struct cxl_device_state { }; }; + /* Stash the memory device status value */ + uint64_t memdev_status; + struct { bool set; uint64_t last_set; @@ -353,8 +356,10 @@ static inline void __toggle_media(CXLDeviceState *cxl_dstate, int val) { uint64_t dev_status_reg; - dev_status_reg = FIELD_DP64(0, CXL_MEM_DEV_STS, MEDIA_STATUS, val); - cxl_dstate->mbox_reg_state64[R_CXL_MEM_DEV_STS] = dev_status_reg; + dev_status_reg = cxl_dstate->memdev_status; + dev_status_reg = FIELD_DP64(dev_status_reg, CXL_MEM_DEV_STS, MEDIA_STATUS, + val); + cxl_dstate->memdev_status = dev_status_reg; } #define cxl_dev_disable_media(cxlds) \ do { __toggle_media((cxlds), 0x3); } while (0) From 729d45a6af06753d3e330f589c248fe9687c5cd5 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Fri, 26 Jan 2024 12:01:27 +0000 Subject: [PATCH 43/60] hw/cxl: Pass CXLComponentState to cache_mem_ops cache_mem_ops.{read,write}() interprets opaque as CXLComponentState(cxl_cstate) instead of ComponentRegisters(cregs). Fortunately, cregs is the first member of cxl_cstate, so their values are the same. Fixes: 9e58f52d3f8 ("hw/cxl/component: Introduce CXL components (8.1.x, 8.2.5)") Reviewed-by: Fan Ni Signed-off-by: Li Zhijian Signed-off-by: Jonathan Cameron Message-Id: <20240126120132.24248-8-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-component-utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c index 29d477492b..9dfde6c0b3 100644 --- a/hw/cxl/cxl-component-utils.c +++ b/hw/cxl/cxl-component-utils.c @@ -199,7 +199,7 @@ void cxl_component_register_block_init(Object *obj, /* io registers controls link which we don't care about in QEMU */ memory_region_init_io(&cregs->io, obj, NULL, cregs, ".io", CXL2_COMPONENT_IO_REGION_SIZE); - memory_region_init_io(&cregs->cache_mem, obj, &cache_mem_ops, cregs, + memory_region_init_io(&cregs->cache_mem, obj, &cache_mem_ops, cxl_cstate, ".cache_mem", CXL2_COMPONENT_CM_REGION_SIZE); memory_region_add_subregion(&cregs->component_registers, 0, &cregs->io); From f8b02dd655cc20ca7f321c42acbffb143eb8372a Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Fri, 26 Jan 2024 12:01:28 +0000 Subject: [PATCH 44/60] hw/cxl: Pass NULL for a NULL MemoryRegionOps a NULL parameter is enough for a NULL MemoryRegionOps Reviewed-by: Fan Ni Signed-off-by: Li Zhijian Signed-off-by: Jonathan Cameron Message-Id: <20240126120132.24248-9-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-component-utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c index 9dfde6c0b3..5ddd47ed8d 100644 --- a/hw/cxl/cxl-component-utils.c +++ b/hw/cxl/cxl-component-utils.c @@ -197,7 +197,7 @@ void cxl_component_register_block_init(Object *obj, CXL2_COMPONENT_BLOCK_SIZE); /* io registers controls link which we don't care about in QEMU */ - memory_region_init_io(&cregs->io, obj, NULL, cregs, ".io", + memory_region_init_io(&cregs->io, obj, NULL, NULL, ".io", CXL2_COMPONENT_IO_REGION_SIZE); memory_region_init_io(&cregs->cache_mem, obj, &cache_mem_ops, cxl_cstate, ".cache_mem", CXL2_COMPONENT_CM_REGION_SIZE); From 48461825af1bdc68cfa25fa0b698c958b65f7368 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 26 Jan 2024 12:01:29 +0000 Subject: [PATCH 45/60] hw/mem/cxl_type3: Fix potential divide by zero reported by coverity Fixes Coverity ID 1522368. Currently error_fatal is set if interleave_ways_dec() is going to return 0 but we should handle that zero return explicitly. Reported-by: Stefan Hajnoczi Reviewed-by: Fan Ni Signed-off-by: Jonathan Cameron Message-Id: <20240126120132.24248-10-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/cxl_type3.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 1b92a065a3..71fcb44613 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -794,8 +794,13 @@ static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa) } if (((uint64_t)host_addr < decoder_base) || (hpa_offset >= decoder_size)) { - dpa_base += decoder_size / - cxl_interleave_ways_dec(iw, &error_fatal); + int decoded_iw = cxl_interleave_ways_dec(iw, &error_fatal); + + if (decoded_iw == 0) { + return false; + } + + dpa_base += decoder_size / decoded_iw; continue; } From 14ec4ff3e4293635240ba5a7afe7a0f3ba447d31 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 26 Jan 2024 12:01:30 +0000 Subject: [PATCH 46/60] tests/acpi: Allow update of DSDT.cxl The _STA value returned currently indicates the ACPI0017 device is not enabled. Whilst this isn't a real device, setting _STA like this may prevent an OS from enumerating it correctly and hence from parsing the CEDT table. Signed-off-by: Jonathan Cameron Message-Id: <20240126120132.24248-11-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/bios-tables-test-allowed-diff.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index dfb8523c8b..9ce0f596cc 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1 +1,2 @@ /* List of comma-separated changed AML files to ignore */ +"tests/data/acpi/q35/DSDT.cxl", From d9ae5802f656f6fb53b788747ba557a826b6e740 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 26 Jan 2024 12:01:31 +0000 Subject: [PATCH 47/60] hw/i386: Fix _STA return value for ACPI0017 Found whilst testing a series for the linux kernel that actually bothers to check if enabled is set. 0xB is the option used for vast majority of DSDT entries in QEMU. It is a little odd for a device that doesn't really exist and is simply a hook to tell the OS there is a CEDT table but 0xB seems a reasonable choice and avoids need to special case this device in the OS. Means: * Device present. * Device enabled and decoding it's resources. * Not shown in UI * Functioning properly * No battery (on this device!) Signed-off-by: Jonathan Cameron Message-Id: <20240126120132.24248-12-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index d5c6c94474..d3ce96dd9f 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1415,7 +1415,7 @@ static void build_acpi0017(Aml *table) aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0017"))); method = aml_method("_STA", 0, AML_NOTSERIALIZED); - aml_append(method, aml_return(aml_int(0x01))); + aml_append(method, aml_return(aml_int(0x0B))); aml_append(dev, method); build_cxl_dsm_method(dev); From b24a981b9f1c4767aaea815e504a2c7aeb405d72 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 26 Jan 2024 12:01:32 +0000 Subject: [PATCH 48/60] tests/acpi: Update DSDT.cxl to reflect change _STA return value. _STA will now return 0xB (in common with most other devices) rather than not setting the bits to indicate this fake device has not been enabled, and self tests haven't passed. Signed-off-by: Jonathan Cameron Message-Id: <20240126120132.24248-13-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/data/acpi/q35/DSDT.cxl | Bin 9713 -> 9714 bytes tests/qtest/bios-tables-test-allowed-diff.h | 1 - 2 files changed, 1 deletion(-) diff --git a/tests/data/acpi/q35/DSDT.cxl b/tests/data/acpi/q35/DSDT.cxl index 145301c52af9a17242bb306c210f8a7e0f01b827..afcdc0d0ba8e41bb70ac20a78dcc8562ca0cb74b 100644 GIT binary patch delta 64 zcmez9{mGllCDuD9;-j0~S)C(%d^zGhJY9GlodY}#3=GW~ UL^$JvLmU~FaB*)wsA#|f0Fwq1RsaA1 delta 63 zcmez5{n4AtCD Date: Fri, 26 Jan 2024 12:16:32 +0000 Subject: [PATCH 49/60] hw/cxl: Update HDM Decoder capability to version 3 Part of standardizing the QEMU code on CXL r3.1. No fuctional changes as everything added is optional and it is set as not implemented. Reviewed-by: Fan Ni Signed-off-by: Jonathan Cameron Message-Id: <20240126121636.24611-2-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-component-utils.c | 10 +++++++++- include/hw/cxl/cxl_component.h | 16 ++++++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c index 5ddd47ed8d..a55cf5a036 100644 --- a/hw/cxl/cxl-component-utils.c +++ b/hw/cxl/cxl-component-utils.c @@ -243,6 +243,14 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk, ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_4K, 1); ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, POISON_ON_ERR_CAP, 0); + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, 3_6_12_WAY, 0); + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, 16_WAY, 0); + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, UIO, 0); + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, + UIO_DECODER_COUNT, 0); + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, MEMDATA_NXM_CAP, 0); + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, + SUPPORTED_COHERENCY_MODEL, 0); /* Unknown */ ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_GLOBAL_CONTROL, HDM_DECODER_ENABLE, 0); write_msk[R_CXL_HDM_DECODER_GLOBAL_CONTROL] = 0x3; @@ -326,7 +334,7 @@ void cxl_component_register_init_common(uint32_t *reg_state, return; } - init_cap_reg(HDM, 5, 1); + init_cap_reg(HDM, 5, CXL_HDM_CAPABILITY_VERSION); hdm_init_common(reg_state, write_msk, type); if (caps < 5) { diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h index 5227a8e833..7d3edef1bf 100644 --- a/include/hw/cxl/cxl_component.h +++ b/include/hw/cxl/cxl_component.h @@ -109,8 +109,9 @@ REG32(CXL_RAS_ERR_HEADER0, CXL_RAS_REGISTERS_OFFSET + 0x18) (CXL_SEC_REGISTERS_OFFSET + CXL_SEC_REGISTERS_SIZE) #define CXL_LINK_REGISTERS_SIZE 0x38 -/* 8.2.5.12 - CXL HDM Decoder Capability Structure */ -#define HDM_DECODE_MAX 10 /* 8.2.5.12.1 */ +/* CXL r3.1 Section 8.2.4.20: CXL HDM Decoder Capability Structure */ +#define HDM_DECODE_MAX 10 /* Maximum decoders for Devices */ +#define CXL_HDM_CAPABILITY_VERSION 3 #define CXL_HDM_REGISTERS_OFFSET \ (CXL_LINK_REGISTERS_OFFSET + CXL_LINK_REGISTERS_SIZE) #define CXL_HDM_REGISTERS_SIZE (0x10 + 0x20 * HDM_DECODE_MAX) @@ -133,6 +134,11 @@ REG32(CXL_RAS_ERR_HEADER0, CXL_RAS_REGISTERS_OFFSET + 0x18) FIELD(CXL_HDM_DECODER##n##_CTRL, COMMITTED, 10, 1) \ FIELD(CXL_HDM_DECODER##n##_CTRL, ERR, 11, 1) \ FIELD(CXL_HDM_DECODER##n##_CTRL, TYPE, 12, 1) \ + FIELD(CXL_HDM_DECODER##n##_CTRL, BI, 13, 1) \ + FIELD(CXL_HDM_DECODER##n##_CTRL, UIO, 14, 1) \ + FIELD(CXL_HDM_DECODER##n##_CTRL, UIG, 16, 4) \ + FIELD(CXL_HDM_DECODER##n##_CTRL, UIW, 20, 4) \ + FIELD(CXL_HDM_DECODER##n##_CTRL, ISP, 24, 4) \ REG32(CXL_HDM_DECODER##n##_TARGET_LIST_LO, \ CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x24) \ REG32(CXL_HDM_DECODER##n##_TARGET_LIST_HI, \ @@ -148,6 +154,12 @@ REG32(CXL_HDM_DECODER_CAPABILITY, CXL_HDM_REGISTERS_OFFSET) FIELD(CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_256B, 8, 1) FIELD(CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_4K, 9, 1) FIELD(CXL_HDM_DECODER_CAPABILITY, POISON_ON_ERR_CAP, 10, 1) + FIELD(CXL_HDM_DECODER_CAPABILITY, 3_6_12_WAY, 11, 1) + FIELD(CXL_HDM_DECODER_CAPABILITY, 16_WAY, 12, 1) + FIELD(CXL_HDM_DECODER_CAPABILITY, UIO, 13, 1) + FIELD(CXL_HDM_DECODER_CAPABILITY, UIO_DECODER_COUNT, 16, 4) + FIELD(CXL_HDM_DECODER_CAPABILITY, MEMDATA_NXM_CAP, 20, 1) + FIELD(CXL_HDM_DECODER_CAPABILITY, SUPPORTED_COHERENCY_MODEL, 21, 2) REG32(CXL_HDM_DECODER_GLOBAL_CONTROL, CXL_HDM_REGISTERS_OFFSET + 4) FIELD(CXL_HDM_DECODER_GLOBAL_CONTROL, POISON_ON_ERR_EN, 0, 1) FIELD(CXL_HDM_DECODER_GLOBAL_CONTROL, HDM_DECODER_ENABLE, 1, 1) From 40ecac10c03aa74deada32a1ee7af1ad9750d483 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 26 Jan 2024 12:16:33 +0000 Subject: [PATCH 50/60] hw/cxl: Update link register definitions. Not actually implemented, but we need to reserve more space for the larger version of the structure in CXL r3.1. Reviewed-by: Fan Ni Signed-off-by: Jonathan Cameron Message-Id: <20240126121636.24611-3-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/cxl/cxl_component.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h index 7d3edef1bf..2c7631bd1e 100644 --- a/include/hw/cxl/cxl_component.h +++ b/include/hw/cxl/cxl_component.h @@ -104,10 +104,10 @@ REG32(CXL_RAS_ERR_HEADER0, CXL_RAS_REGISTERS_OFFSET + 0x18) (CXL_RAS_REGISTERS_OFFSET + CXL_RAS_REGISTERS_SIZE) #define CXL_SEC_REGISTERS_SIZE 0 /* We don't implement 1.1 downstream ports */ -/* 8.2.5.11 - CXL Link Capability Structure */ +/* CXL r3.1 Section 8.2.4.19: CXL Link Capability Structure */ #define CXL_LINK_REGISTERS_OFFSET \ (CXL_SEC_REGISTERS_OFFSET + CXL_SEC_REGISTERS_SIZE) -#define CXL_LINK_REGISTERS_SIZE 0x38 +#define CXL_LINK_REGISTERS_SIZE 0x50 /* CXL r3.1 Section 8.2.4.20: CXL HDM Decoder Capability Structure */ #define HDM_DECODE_MAX 10 /* Maximum decoders for Devices */ From a185ff05fed2aa445f81d16a472e809d2cbea91b Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 26 Jan 2024 12:16:34 +0000 Subject: [PATCH 51/60] hw/cxl: Update RAS Capability Definitions for version 3. Part of bringing all of CXL emulation inline with CXL r3.1. No functional changes. Reviewed-by: Fan Ni Signed-off-by: Jonathan Cameron Message-Id: <20240126121636.24611-4-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-component-utils.c | 2 +- include/hw/cxl/cxl_component.h | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c index a55cf5a036..848bfa20f1 100644 --- a/hw/cxl/cxl-component-utils.c +++ b/hw/cxl/cxl-component-utils.c @@ -325,7 +325,7 @@ void cxl_component_register_init_common(uint32_t *reg_state, CXL_##reg##_REGISTERS_OFFSET); \ } while (0) - init_cap_reg(RAS, 2, 2); + init_cap_reg(RAS, 2, CXL_RAS_CAPABILITY_VERSION); ras_init_common(reg_state, write_msk); init_cap_reg(LINK, 4, 2); diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h index 2c7631bd1e..b5da72b789 100644 --- a/include/hw/cxl/cxl_component.h +++ b/include/hw/cxl/cxl_component.h @@ -60,8 +60,9 @@ CXLx_CAPABILITY_HEADER(SNOOP, 0x14) * implements. Some of these are specific to certain types of components, but * this implementation leaves enough space regardless. */ -/* 8.2.5.9 - CXL RAS Capability Structure */ +/* CXL r3.1 Section 8.2.4.17: CXL RAS Capability Structure */ +#define CXL_RAS_CAPABILITY_VERSION 3 /* Give ample space for caps before this */ #define CXL_RAS_REGISTERS_OFFSET 0x80 #define CXL_RAS_REGISTERS_SIZE 0x58 @@ -95,6 +96,8 @@ REG32(CXL_RAS_COR_ERR_STATUS, CXL_RAS_REGISTERS_OFFSET + 0xc) REG32(CXL_RAS_COR_ERR_MASK, CXL_RAS_REGISTERS_OFFSET + 0x10) REG32(CXL_RAS_ERR_CAP_CTRL, CXL_RAS_REGISTERS_OFFSET + 0x14) FIELD(CXL_RAS_ERR_CAP_CTRL, FIRST_ERROR_POINTER, 0, 6) + FIELD(CXL_RAS_ERR_CAP_CTRL, MULTIPLE_HEADER_RECORDING_CAP, 9, 1) + FIELD(CXL_RAS_ERR_POISON_ENABLED, POISON_ENABLED, 13, 1) REG32(CXL_RAS_ERR_HEADER0, CXL_RAS_REGISTERS_OFFSET + 0x18) #define CXL_RAS_ERR_HEADER_NUM 32 /* Offset 0x18 - 0x58 reserved for RAS logs */ From 202f651469b7a6440577cb6a985cf1eb538ea899 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 26 Jan 2024 12:16:35 +0000 Subject: [PATCH 52/60] hw/cxl: Update mailbox status registers. Whilst the reported version was 1 so there should be no changes, a couple of fields (where the value 0 was valid) were not defined. Make those explicit and update references to be based on CXL r3.1. Reviewed-by: Fan Ni Signed-off-by: Jonathan Cameron Message-Id: <20240126121636.24611-5-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-device-utils.c | 6 +++++- include/hw/cxl/cxl_device.h | 17 ++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c index 40b619ffd9..9df8738f86 100644 --- a/hw/cxl/cxl-device-utils.c +++ b/hw/cxl/cxl-device-utils.c @@ -366,6 +366,10 @@ static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate) ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP, MSI_N, msi_n); cxl_dstate->mbox_msi_n = msi_n; + ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP, + MBOX_READY_TIME, 0); /* Not reported */ + ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP, + TYPE, 0); /* Inferred from class code */ } static void memdev_reg_init_common(CXLDeviceState *cxl_dstate) @@ -392,7 +396,7 @@ void cxl_device_register_init_t3(CXLType3Dev *ct3d) cxl_device_cap_init(cxl_dstate, DEVICE_STATUS, 1, 2); device_reg_init_common(cxl_dstate); - cxl_device_cap_init(cxl_dstate, MAILBOX, 2, 1); + cxl_device_cap_init(cxl_dstate, MAILBOX, 2, CXL_DEV_MAILBOX_VERSION); mailbox_reg_init_common(cxl_dstate); cxl_device_cap_init(cxl_dstate, MEMORY_DEVICE, 0x4000, 1); diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index 31d2afcd3d..9f51c586d7 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -189,7 +189,7 @@ typedef struct cxl_device_state { }; }; - /* mmio for the mailbox registers 8.2.8.4 */ + /* CXL r3.1 Section 8.2.8.4: Mailbox Registers */ struct { MemoryRegion mailbox; uint16_t payload_size; @@ -310,39 +310,42 @@ void cxl_initialize_t3_ld_cci(CXLCCI *cci, DeviceState *d, REG64(CXL_DEV_EVENT_STATUS, 0) FIELD(CXL_DEV_EVENT_STATUS, EVENT_STATUS, 0, 32) -/* CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register */ +#define CXL_DEV_MAILBOX_VERSION 1 +/* CXL r3.1 Section 8.2.8.4.3: Mailbox Capabilities Register */ REG32(CXL_DEV_MAILBOX_CAP, 0) FIELD(CXL_DEV_MAILBOX_CAP, PAYLOAD_SIZE, 0, 5) FIELD(CXL_DEV_MAILBOX_CAP, INT_CAP, 5, 1) FIELD(CXL_DEV_MAILBOX_CAP, BG_INT_CAP, 6, 1) FIELD(CXL_DEV_MAILBOX_CAP, MSI_N, 7, 4) + FIELD(CXL_DEV_MAILBOX_CAP, MBOX_READY_TIME, 11, 8) + FIELD(CXL_DEV_MAILBOX_CAP, TYPE, 19, 4) -/* CXL 2.0 8.2.8.4.4 Mailbox Control Register */ +/* CXL r3.1 Section 8.2.8.4.4: Mailbox Control Register */ REG32(CXL_DEV_MAILBOX_CTRL, 4) FIELD(CXL_DEV_MAILBOX_CTRL, DOORBELL, 0, 1) FIELD(CXL_DEV_MAILBOX_CTRL, INT_EN, 1, 1) FIELD(CXL_DEV_MAILBOX_CTRL, BG_INT_EN, 2, 1) -/* CXL 2.0 8.2.8.4.5 Command Register */ +/* CXL r3.1 Section 8.2.8.4.5: Command Register */ REG64(CXL_DEV_MAILBOX_CMD, 8) FIELD(CXL_DEV_MAILBOX_CMD, COMMAND, 0, 8) FIELD(CXL_DEV_MAILBOX_CMD, COMMAND_SET, 8, 8) FIELD(CXL_DEV_MAILBOX_CMD, LENGTH, 16, 20) -/* CXL 2.0 8.2.8.4.6 Mailbox Status Register */ +/* CXL r3.1 Section 8.2.8.4.6: Mailbox Status Register */ REG64(CXL_DEV_MAILBOX_STS, 0x10) FIELD(CXL_DEV_MAILBOX_STS, BG_OP, 0, 1) FIELD(CXL_DEV_MAILBOX_STS, ERRNO, 32, 16) FIELD(CXL_DEV_MAILBOX_STS, VENDOR_ERRNO, 48, 16) -/* CXL 2.0 8.2.8.4.7 Background Command Status Register */ +/* CXL r3.1 Section 8.2.8.4.7: Background Command Status Register */ REG64(CXL_DEV_BG_CMD_STS, 0x18) FIELD(CXL_DEV_BG_CMD_STS, OP, 0, 16) FIELD(CXL_DEV_BG_CMD_STS, PERCENTAGE_COMP, 16, 7) FIELD(CXL_DEV_BG_CMD_STS, RET_CODE, 32, 16) FIELD(CXL_DEV_BG_CMD_STS, VENDOR_RET_CODE, 48, 16) -/* CXL 2.0 8.2.8.4.8 Command Payload Registers */ +/* CXL r3.1 Section 8.2.8.4.8: Command Payload Registers */ REG32(CXL_DEV_CMD_PAYLOAD, 0x20) REG64(CXL_MEM_DEV_STS, 0) From 8700ee15de465a55e5c7281f87618ca4b4827441 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 26 Jan 2024 12:16:36 +0000 Subject: [PATCH 53/60] hw/cxl: Standardize all references on CXL r3.1 and minor updates Previously not all references mentioned any spec version at all. Given r3.1 is the current specification available for evaluation at www.computeexpresslink.org update references to refer to that. Hopefully this won't become a never ending job. A few structure definitions have been updated to add new fields. Defaults of 0 and read only are valid choices for these new DVSEC registers so go with that for now. There are additional error codes and some of the 'questions' in the comments are resolved now. Update documentation reference to point to the CXL r3.1 specification with naming closer to what is on the cover. For cases where there are structure version numbers, add defines so they can be found next to the register definitions. Signed-off-by: Jonathan Cameron Message-Id: <20240126121636.24611-6-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/system/devices/cxl.rst | 3 +- hw/cxl/cxl-component-utils.c | 17 +++++---- hw/cxl/cxl-device-utils.c | 8 ++-- hw/cxl/cxl-events.c | 2 +- hw/cxl/cxl-mailbox-utils.c | 49 ++++++++++++++----------- hw/mem/cxl_type3.c | 6 +-- hw/pci-bridge/cxl_downstream.c | 4 +- hw/pci-bridge/cxl_root_port.c | 4 +- hw/pci-bridge/cxl_upstream.c | 4 +- include/hw/cxl/cxl_cdat.h | 10 ++--- include/hw/cxl/cxl_component.h | 26 ++++++++----- include/hw/cxl/cxl_device.h | 60 +++++++++++++++++++++--------- include/hw/cxl/cxl_events.h | 18 ++++----- include/hw/cxl/cxl_pci.h | 67 ++++++++++++++++++++++++---------- 14 files changed, 174 insertions(+), 104 deletions(-) diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst index 6ab5f72473..10a0e9bc9f 100644 --- a/docs/system/devices/cxl.rst +++ b/docs/system/devices/cxl.rst @@ -411,5 +411,4 @@ References - Consortium website for specifications etc: http://www.computeexpresslink.org - - Compute Express link Revision 2 specification, October 2020 - - CEDT CFMWS & QTG _DSM ECN May 2021 + - Compute Express Link (CXL) Specification, Revision 3.1, August 2023 diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c index 848bfa20f1..84ab503325 100644 --- a/hw/cxl/cxl-component-utils.c +++ b/hw/cxl/cxl-component-utils.c @@ -13,7 +13,7 @@ #include "hw/pci/pci.h" #include "hw/cxl/cxl.h" -/* CXL r3.0 Section 8.2.4.19.1 CXL HDM Decoder Capability Register */ +/* CXL r3.1 Section 8.2.4.20.1 CXL HDM Decoder Capability Register */ int cxl_decoder_count_enc(int count) { switch (count) { @@ -160,11 +160,11 @@ static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value, } /* - * 8.2.3 + * CXL r3.1 Section 8.2.3: Component Register Layout and Definition * The access restrictions specified in Section 8.2.2 also apply to CXL 2.0 * Component Registers. * - * 8.2.2 + * CXL r3.1 Section 8.2.2: Accessing Component Registers * • A 32 bit register shall be accessed as a 4 Bytes quantity. Partial * reads are not permitted. * • A 64 bit register shall be accessed as a 8 Bytes quantity. Partial @@ -308,7 +308,8 @@ void cxl_component_register_init_common(uint32_t *reg_state, /* CXL Capability Header Register */ ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, ID, 1); - ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, VERSION, 1); + ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, VERSION, + CXL_CAPABILITY_VERSION); ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, CACHE_MEM_VERSION, 1); ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, ARRAY_SIZE, caps); @@ -328,7 +329,7 @@ void cxl_component_register_init_common(uint32_t *reg_state, init_cap_reg(RAS, 2, CXL_RAS_CAPABILITY_VERSION); ras_init_common(reg_state, write_msk); - init_cap_reg(LINK, 4, 2); + init_cap_reg(LINK, 4, CXL_LINK_CAPABILITY_VERSION); if (caps < 3) { return; @@ -341,8 +342,8 @@ void cxl_component_register_init_common(uint32_t *reg_state, return; } - init_cap_reg(EXTSEC, 6, 1); - init_cap_reg(SNOOP, 8, 1); + init_cap_reg(EXTSEC, 6, CXL_EXTSEC_CAP_VERSION); + init_cap_reg(SNOOP, 8, CXL_SNOOP_CAP_VERSION); #undef init_cap_reg } @@ -467,7 +468,7 @@ void cxl_component_create_dvsec(CXLComponentState *cxl, cxl->dvsec_offset += length; } -/* CXL r3.0 Section 8.2.4.19.7 CXL HDM Decoder n Control Register */ +/* CXL r3.1 Section 8.2.4.20.7 CXL HDM Decoder n Control Register */ uint8_t cxl_interleave_ways_enc(int iw, Error **errp) { switch (iw) { diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c index 9df8738f86..035d034f6d 100644 --- a/hw/cxl/cxl-device-utils.c +++ b/hw/cxl/cxl-device-utils.c @@ -13,7 +13,7 @@ /* * Device registers have no restrictions per the spec, and so fall back to the - * default memory mapped register rules in 8.2: + * default memory mapped register rules in CXL r3.1 Section 8.2: * Software shall use CXL.io Memory Read and Write to access memory mapped * register defined in this section. Unless otherwise specified, software * shall restrict the accesses width based on the following: @@ -393,13 +393,15 @@ void cxl_device_register_init_t3(CXLType3Dev *ct3d) ARRAY_FIELD_DP64(cap_h, CXL_DEV_CAP_ARRAY, CAP_VERSION, 1); ARRAY_FIELD_DP64(cap_h, CXL_DEV_CAP_ARRAY, CAP_COUNT, cap_count); - cxl_device_cap_init(cxl_dstate, DEVICE_STATUS, 1, 2); + cxl_device_cap_init(cxl_dstate, DEVICE_STATUS, 1, + CXL_DEVICE_STATUS_VERSION); device_reg_init_common(cxl_dstate); cxl_device_cap_init(cxl_dstate, MAILBOX, 2, CXL_DEV_MAILBOX_VERSION); mailbox_reg_init_common(cxl_dstate); - cxl_device_cap_init(cxl_dstate, MEMORY_DEVICE, 0x4000, 1); + cxl_device_cap_init(cxl_dstate, MEMORY_DEVICE, 0x4000, + CXL_MEM_DEV_STATUS_VERSION); memdev_reg_init_common(cxl_dstate); cxl_initialize_mailbox_t3(&ct3d->cci, DEVICE(ct3d), diff --git a/hw/cxl/cxl-events.c b/hw/cxl/cxl-events.c index affcf8a34d..d397718b1b 100644 --- a/hw/cxl/cxl-events.c +++ b/hw/cxl/cxl-events.c @@ -204,7 +204,7 @@ CXLRetCode cxl_event_clear_records(CXLDeviceState *cxlds, * record that will not be cleared when Clear Event Records is executed, * the device shall return the Invalid Handle return code and shall not * clear any of the specified event records." - * -- CXL 3.0 8.2.9.2.3 + * -- CXL r3.1 Section 8.2.9.2.3: Clear Event Records (0101h) */ entry = cxl_event_get_head(log); for (nr = 0; entry && nr < pl->nr_recs; nr++) { diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index dc42850767..e5eb97cb91 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -86,7 +86,7 @@ enum { #define MANAGEMENT_COMMAND 0x0 }; -/* CCI Message Format CXL r3.0 Figure 7-19 */ +/* CCI Message Format CXL r3.1 Figure 7-19 */ typedef struct CXLCCIMessage { uint8_t category; #define CXL_CCI_CAT_REQ 0 @@ -342,7 +342,7 @@ static CXLRetCode cmd_events_set_interrupt_policy(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } -/* CXL r3.0 section 8.2.9.1.1: Identify (Opcode 0001h) */ +/* CXL r3.1 section 8.2.9.1.1: Identify (Opcode 0001h) */ static CXLRetCode cmd_infostat_identify(const struct cxl_cmd *cmd, uint8_t *payload_in, size_t len_in, @@ -403,7 +403,7 @@ static void cxl_set_dsp_active_bm(PCIBus *b, PCIDevice *d, } } -/* CXL r3 8.2.9.1.1 */ +/* CXL r3.1 Section 7.6.7.1.1: Identify Switch Device (Opcode 5100h) */ static CXLRetCode cmd_identify_switch_device(const struct cxl_cmd *cmd, uint8_t *payload_in, size_t len_in, @@ -455,7 +455,7 @@ static CXLRetCode cmd_identify_switch_device(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } -/* CXL r3.0 Section 7.6.7.1.2: Get Physical Port State (Opcode 5101h) */ +/* CXL r3.1 Section 7.6.7.1.2: Get Physical Port State (Opcode 5101h) */ static CXLRetCode cmd_get_physical_port_state(const struct cxl_cmd *cmd, uint8_t *payload_in, size_t len_in, @@ -463,14 +463,14 @@ static CXLRetCode cmd_get_physical_port_state(const struct cxl_cmd *cmd, size_t *len_out, CXLCCI *cci) { - /* CXL r3.0 Table 7-18: Get Physical Port State Request Payload */ + /* CXL r3.1 Table 7-17: Get Physical Port State Request Payload */ struct cxl_fmapi_get_phys_port_state_req_pl { uint8_t num_ports; uint8_t ports[]; } QEMU_PACKED *in; /* - * CXL r3.0 Table 7-20: Get Physical Port State Port Information Block + * CXL r3.1 Table 7-19: Get Physical Port State Port Information Block * Format */ struct cxl_fmapi_port_state_info_block { @@ -491,7 +491,7 @@ static CXLRetCode cmd_get_physical_port_state(const struct cxl_cmd *cmd, uint8_t supported_ld_count; } QEMU_PACKED; - /* CXL r3.0 Table 7-19: Get Physical Port State Response Payload */ + /* CXL r3.1 Table 7-18: Get Physical Port State Response Payload */ struct cxl_fmapi_get_phys_port_state_resp_pl { uint8_t num_ports; uint8_t rsv1[3]; @@ -579,7 +579,7 @@ static CXLRetCode cmd_get_physical_port_state(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } -/* CXL r3.0 8.2.9.1.2 */ +/* CXL r3.1 Section 8.2.9.1.2: Background Operation Status (Opcode 0002h) */ static CXLRetCode cmd_infostat_bg_op_sts(const struct cxl_cmd *cmd, uint8_t *payload_in, size_t len_in, @@ -609,7 +609,7 @@ static CXLRetCode cmd_infostat_bg_op_sts(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } -/* 8.2.9.2.1 */ +/* CXL r3.1 Section 8.2.9.3.1: Get FW Info (Opcode 0200h) */ static CXLRetCode cmd_firmware_update_get_info(const struct cxl_cmd *cmd, uint8_t *payload_in, size_t len, @@ -647,7 +647,7 @@ static CXLRetCode cmd_firmware_update_get_info(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } -/* 8.2.9.3.1 */ +/* CXL r3.1 Section 8.2.9.4.1: Get Timestamp (Opcode 0300h) */ static CXLRetCode cmd_timestamp_get(const struct cxl_cmd *cmd, uint8_t *payload_in, size_t len_in, @@ -664,7 +664,7 @@ static CXLRetCode cmd_timestamp_get(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } -/* 8.2.9.3.2 */ +/* CXL r3.1 Section 8.2.9.4.2: Set Timestamp (Opcode 0301h) */ static CXLRetCode cmd_timestamp_set(const struct cxl_cmd *cmd, uint8_t *payload_in, size_t len_in, @@ -683,13 +683,13 @@ static CXLRetCode cmd_timestamp_set(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } -/* CXL 3.0 8.2.9.5.2.1 Command Effects Log (CEL) */ +/* CXL r3.1 Section 8.2.9.5.2.1: Command Effects Log (CEL) */ static const QemuUUID cel_uuid = { .data = UUID(0x0da9c0b5, 0xbf41, 0x4b78, 0x8f, 0x79, 0x96, 0xb1, 0x62, 0x3b, 0x3f, 0x17) }; -/* 8.2.9.4.1 */ +/* CXL r3.1 Section 8.2.9.5.1: Get Supported Logs (Opcode 0400h) */ static CXLRetCode cmd_logs_get_supported(const struct cxl_cmd *cmd, uint8_t *payload_in, size_t len_in, @@ -715,7 +715,7 @@ static CXLRetCode cmd_logs_get_supported(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } -/* 8.2.9.4.2 */ +/* CXL r3.1 Section 8.2.9.5.2: Get Log (Opcode 0401h) */ static CXLRetCode cmd_logs_get_log(const struct cxl_cmd *cmd, uint8_t *payload_in, size_t len_in, @@ -732,14 +732,11 @@ static CXLRetCode cmd_logs_get_log(const struct cxl_cmd *cmd, get_log = (void *)payload_in; /* - * 8.2.9.4.2 - * The device shall return Invalid Parameter if the Offset or Length + * CXL r3.1 Section 8.2.9.5.2: Get Log (Opcode 0401h) + * The device shall return Invalid Input if the Offset or Length * fields attempt to access beyond the size of the log as reported by Get * Supported Logs. * - * XXX: Spec is wrong, "Invalid Parameter" isn't a thing. - * XXX: Spec doesn't address incorrect UUID incorrectness. - * * The CEL buffer is large enough to fit all commands in the emulation, so * the only possible failure would be if the mailbox itself isn't big * enough. @@ -749,7 +746,7 @@ static CXLRetCode cmd_logs_get_log(const struct cxl_cmd *cmd, } if (!qemu_uuid_is_equal(&get_log->uuid, &cel_uuid)) { - return CXL_MBOX_UNSUPPORTED; + return CXL_MBOX_INVALID_LOG; } /* Store off everything to local variables so we can wipe out the payload */ @@ -760,7 +757,7 @@ static CXLRetCode cmd_logs_get_log(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } -/* 8.2.9.5.1.1 */ +/* CXL r3.1 Section 8.2.9.9.1.1: Identify Memory Device (Opcode 4000h) */ static CXLRetCode cmd_identify_memory_device(const struct cxl_cmd *cmd, uint8_t *payload_in, size_t len_in, @@ -815,6 +812,7 @@ static CXLRetCode cmd_identify_memory_device(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +/* CXL r3.1 Section 8.2.9.9.2.1: Get Partition Info (Opcode 4100h) */ static CXLRetCode cmd_ccls_get_partition_info(const struct cxl_cmd *cmd, uint8_t *payload_in, size_t len_in, @@ -851,6 +849,7 @@ static CXLRetCode cmd_ccls_get_partition_info(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +/* CXL r3.1 Section 8.2.9.9.2.3: Get LSA (Opcode 4102h) */ static CXLRetCode cmd_ccls_get_lsa(const struct cxl_cmd *cmd, uint8_t *payload_in, size_t len_in, @@ -879,6 +878,7 @@ static CXLRetCode cmd_ccls_get_lsa(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +/* CXL r3.1 Section 8.2.9.9.2.4: Set LSA (Opcode 4103h) */ static CXLRetCode cmd_ccls_set_lsa(const struct cxl_cmd *cmd, uint8_t *payload_in, size_t len_in, @@ -940,7 +940,7 @@ static void __do_sanitization(CXLType3Dev *ct3d) } /* - * CXL 3.0 spec section 8.2.9.8.5.1 - Sanitize. + * CXL r3.1 Section 8.2.9.9.5.1: Sanitize (Opcode 4400h) * * Once the Sanitize command has started successfully, the device shall be * placed in the media disabled state. If the command fails or is interrupted @@ -1018,7 +1018,10 @@ static CXLRetCode cmd_get_security_state(const struct cxl_cmd *cmd, *len_out = 4; return CXL_MBOX_SUCCESS; } + /* + * CXL r3.1 Section 8.2.9.9.4.1: Get Poison List (Opcode 4300h) + * * This is very inefficient, but good enough for now! * Also the payload will always fit, so no need to handle the MORE flag and * make this stateful. We may want to allow longer poison lists to aid @@ -1103,6 +1106,7 @@ static CXLRetCode cmd_media_get_poison_list(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +/* CXL r3.1 Section 8.2.9.9.4.2: Inject Poison (Opcode 4301h) */ static CXLRetCode cmd_media_inject_poison(const struct cxl_cmd *cmd, uint8_t *payload_in, size_t len_in, @@ -1146,6 +1150,7 @@ static CXLRetCode cmd_media_inject_poison(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +/* CXL r3.1 Section 8.2.9.9.4.3: Clear Poison (Opcode 4302h */ static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd, uint8_t *payload_in, size_t len_in, diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 71fcb44613..e8801805b9 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -319,7 +319,7 @@ static void build_dvsecs(CXLType3Dev *ct3d) cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE, PCIE_CXL_DEVICE_DVSEC_LENGTH, PCIE_CXL_DEVICE_DVSEC, - PCIE_CXL2_DEVICE_DVSEC_REVID, dvsec); + PCIE_CXL31_DEVICE_DVSEC_REVID, dvsec); dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){ .rsvd = 0, @@ -346,9 +346,9 @@ static void build_dvsecs(CXLType3Dev *ct3d) .rcvd_mod_ts_data_phase1 = 0xef, /* WTF? */ }; cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE, - PCIE_FLEXBUS_PORT_DVSEC_LENGTH_2_0, + PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH, PCIE_FLEXBUS_PORT_DVSEC, - PCIE_FLEXBUS_PORT_DVSEC_REVID_2_0, dvsec); + PCIE_CXL3_FLEXBUS_PORT_DVSEC_REVID, dvsec); } static void hdm_decoder_commit(CXLType3Dev *ct3d, int which) diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c index 405a133eef..742da07a01 100644 --- a/hw/pci-bridge/cxl_downstream.c +++ b/hw/pci-bridge/cxl_downstream.c @@ -109,9 +109,9 @@ static void build_dvsecs(CXLComponentState *cxl) .rcvd_mod_ts_data_phase1 = 0xef, /* WTF? */ }; cxl_component_create_dvsec(cxl, CXL2_DOWNSTREAM_PORT, - PCIE_FLEXBUS_PORT_DVSEC_LENGTH_2_0, + PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH, PCIE_FLEXBUS_PORT_DVSEC, - PCIE_FLEXBUS_PORT_DVSEC_REVID_2_0, dvsec); + PCIE_CXL3_FLEXBUS_PORT_DVSEC_REVID, dvsec); dvsec = (uint8_t *)&(CXLDVSECPortGPF){ .rsvd = 0, diff --git a/hw/pci-bridge/cxl_root_port.c b/hw/pci-bridge/cxl_root_port.c index 8f97697631..62f96994eb 100644 --- a/hw/pci-bridge/cxl_root_port.c +++ b/hw/pci-bridge/cxl_root_port.c @@ -129,9 +129,9 @@ static void build_dvsecs(CXLComponentState *cxl) .rcvd_mod_ts_data_phase1 = 0xef, }; cxl_component_create_dvsec(cxl, CXL2_ROOT_PORT, - PCIE_FLEXBUS_PORT_DVSEC_LENGTH_2_0, + PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH, PCIE_FLEXBUS_PORT_DVSEC, - PCIE_FLEXBUS_PORT_DVSEC_REVID_2_0, dvsec); + PCIE_CXL3_FLEXBUS_PORT_DVSEC_REVID, dvsec); dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){ .rsvd = 0, diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c index d5341b530f..e87eb40177 100644 --- a/hw/pci-bridge/cxl_upstream.c +++ b/hw/pci-bridge/cxl_upstream.c @@ -121,9 +121,9 @@ static void build_dvsecs(CXLComponentState *cxl) .rcvd_mod_ts_data_phase1 = 0xef, /* WTF? */ }; cxl_component_create_dvsec(cxl, CXL2_UPSTREAM_PORT, - PCIE_FLEXBUS_PORT_DVSEC_LENGTH_2_0, + PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH, PCIE_FLEXBUS_PORT_DVSEC, - PCIE_FLEXBUS_PORT_DVSEC_REVID_2_0, dvsec); + PCIE_CXL3_FLEXBUS_PORT_DVSEC_REVID, dvsec); dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){ .rsvd = 0, diff --git a/include/hw/cxl/cxl_cdat.h b/include/hw/cxl/cxl_cdat.h index 7f67638685..8e3d094608 100644 --- a/include/hw/cxl/cxl_cdat.h +++ b/include/hw/cxl/cxl_cdat.h @@ -16,17 +16,17 @@ /* * Reference: * Coherent Device Attribute Table (CDAT) Specification, Rev. 1.03, July. 2022 - * Compute Express Link (CXL) Specification, Rev. 3.0, Aug. 2022 + * Compute Express Link (CXL) Specification, Rev. 3.1, Aug. 2023 */ -/* Table Access DOE - CXL r3.0 8.1.11 */ +/* Table Access DOE - CXL r3.1 8.1.11 */ #define CXL_DOE_TABLE_ACCESS 2 #define CXL_DOE_PROTOCOL_CDAT ((CXL_DOE_TABLE_ACCESS << 16) | CXL_VENDOR_ID) -/* Read Entry - CXL r3.0 8.1.11.1 */ +/* Read Entry - CXL r3.1 8.1.11.1 */ #define CXL_DOE_TAB_TYPE_CDAT 0 #define CXL_DOE_TAB_ENT_MAX 0xFFFF -/* Read Entry Request - CXL r3.0 8.1.11.1 Table 8-13 */ +/* Read Entry Request - CXL r3.1 8.1.11.1 Table 8-13 */ #define CXL_DOE_TAB_REQ 0 typedef struct CDATReq { DOEHeader header; @@ -35,7 +35,7 @@ typedef struct CDATReq { uint16_t entry_handle; } QEMU_PACKED CDATReq; -/* Read Entry Response - CXL r3.0 8.1.11.1 Table 8-14 */ +/* Read Entry Response - CXL r3.1 8.1.11.1 Table 8-14 */ #define CXL_DOE_TAB_RSP 0 typedef struct CDATRsp { DOEHeader header; diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h index b5da72b789..0e5d35c263 100644 --- a/include/hw/cxl/cxl_component.h +++ b/include/hw/cxl/cxl_component.h @@ -10,7 +10,7 @@ #ifndef CXL_COMPONENT_H #define CXL_COMPONENT_H -/* CXL 2.0 - 8.2.4 */ +/* CXL r3.1 Section 8.2.4: CXL.cache and CXL.mem Registers */ #define CXL2_COMPONENT_IO_REGION_SIZE 0x1000 #define CXL2_COMPONENT_CM_REGION_SIZE 0x1000 #define CXL2_COMPONENT_BLOCK_SIZE 0x10000 @@ -34,10 +34,11 @@ enum reg_type { * Capability registers are defined at the top of the CXL.cache/mem region and * are packed. For our purposes we will always define the caps in the same * order. - * CXL 2.0 - 8.2.5 Table 142 for details. + * CXL r3.1 Table 8-22: CXL_CAPABILITY_ID Assignment for details. */ -/* CXL 2.0 - 8.2.5.1 */ +/* CXL r3.1 Section 8.2.4.1: CXL Capability Header Register */ +#define CXL_CAPABILITY_VERSION 1 REG32(CXL_CAPABILITY_HEADER, 0) FIELD(CXL_CAPABILITY_HEADER, ID, 0, 16) FIELD(CXL_CAPABILITY_HEADER, VERSION, 16, 4) @@ -102,12 +103,13 @@ REG32(CXL_RAS_ERR_HEADER0, CXL_RAS_REGISTERS_OFFSET + 0x18) #define CXL_RAS_ERR_HEADER_NUM 32 /* Offset 0x18 - 0x58 reserved for RAS logs */ -/* 8.2.5.10 - CXL Security Capability Structure */ +/* CXL r3.1 Section 8.2.4.18: CXL Security Capability Structure */ #define CXL_SEC_REGISTERS_OFFSET \ (CXL_RAS_REGISTERS_OFFSET + CXL_RAS_REGISTERS_SIZE) #define CXL_SEC_REGISTERS_SIZE 0 /* We don't implement 1.1 downstream ports */ /* CXL r3.1 Section 8.2.4.19: CXL Link Capability Structure */ +#define CXL_LINK_CAPABILITY_VERSION 2 #define CXL_LINK_REGISTERS_OFFSET \ (CXL_SEC_REGISTERS_OFFSET + CXL_SEC_REGISTERS_SIZE) #define CXL_LINK_REGISTERS_SIZE 0x50 @@ -175,18 +177,24 @@ HDM_DECODER_INIT(1); HDM_DECODER_INIT(2); HDM_DECODER_INIT(3); -/* 8.2.5.13 - CXL Extended Security Capability Structure (Root complex only) */ +/* + * CXL r3.1 Section 8.2.4.21: CXL Extended Security Capability Structure + * (Root complex only) + */ #define EXTSEC_ENTRY_MAX 256 +#define CXL_EXTSEC_CAP_VERSION 2 #define CXL_EXTSEC_REGISTERS_OFFSET \ (CXL_HDM_REGISTERS_OFFSET + CXL_HDM_REGISTERS_SIZE) #define CXL_EXTSEC_REGISTERS_SIZE (8 * EXTSEC_ENTRY_MAX + 4) -/* 8.2.5.14 - CXL IDE Capability Structure */ +/* CXL r3.1 Section 8.2.4.22: CXL IDE Capability Structure */ +#define CXL_IDE_CAP_VERSION 2 #define CXL_IDE_REGISTERS_OFFSET \ (CXL_EXTSEC_REGISTERS_OFFSET + CXL_EXTSEC_REGISTERS_SIZE) -#define CXL_IDE_REGISTERS_SIZE 0x20 +#define CXL_IDE_REGISTERS_SIZE 0x24 -/* 8.2.5.15 - CXL Snoop Filter Capability Structure */ +/* CXL r3.1 Section 8.2.4.23 - CXL Snoop Filter Capability Structure */ +#define CXL_SNOOP_CAP_VERSION 1 #define CXL_SNOOP_REGISTERS_OFFSET \ (CXL_IDE_REGISTERS_OFFSET + CXL_IDE_REGISTERS_SIZE) #define CXL_SNOOP_REGISTERS_SIZE 0x8 @@ -202,7 +210,7 @@ typedef struct component_registers { MemoryRegion component_registers; /* - * 8.2.4 Table 141: + * CXL r3.1 Table 8-21: CXL Subsystem Component Register Ranges * 0x0000 - 0x0fff CXL.io registers * 0x1000 - 0x1fff CXL.cache and CXL.mem * 0x2000 - 0xdfff Implementation specific diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index 9f51c586d7..d8e184c4ba 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -58,18 +58,30 @@ * */ -#define CXL_DEVICE_CAP_HDR1_OFFSET 0x10 /* Figure 138 */ -#define CXL_DEVICE_CAP_REG_SIZE 0x10 /* 8.2.8.2 */ -#define CXL_DEVICE_CAPS_MAX 4 /* 8.2.8.2.1 + 8.2.8.5 */ +/* CXL r3.1 Figure 8-12: CXL Device Registers */ +#define CXL_DEVICE_CAP_HDR1_OFFSET 0x10 +/* CXL r3.1 Section 8.2.8.2: CXL Device Capability Header Register */ +#define CXL_DEVICE_CAP_REG_SIZE 0x10 + +/* + * CXL r3.1 Section 8.2.8.2.1: CXL Device Capabilities + + * CXL r3.1 Section 8.2.8.5: Memory Device Capabilities + */ +#define CXL_DEVICE_CAPS_MAX 4 #define CXL_CAPS_SIZE \ (CXL_DEVICE_CAP_REG_SIZE * (CXL_DEVICE_CAPS_MAX + 1)) /* +1 for header */ #define CXL_DEVICE_STATUS_REGISTERS_OFFSET 0x80 /* Read comment above */ -#define CXL_DEVICE_STATUS_REGISTERS_LENGTH 0x8 /* 8.2.8.3.1 */ +/* + * CXL r3.1 Section 8.2.8.3: Device Status Registers + * As it is the only Device Status Register in CXL r3.1 + */ +#define CXL_DEVICE_STATUS_REGISTERS_LENGTH 0x8 #define CXL_MAILBOX_REGISTERS_OFFSET \ (CXL_DEVICE_STATUS_REGISTERS_OFFSET + CXL_DEVICE_STATUS_REGISTERS_LENGTH) -#define CXL_MAILBOX_REGISTERS_SIZE 0x20 /* 8.2.8.4, Figure 139 */ +/* CXL r3.1 Figure 8-13: Mailbox Registers */ +#define CXL_MAILBOX_REGISTERS_SIZE 0x20 #define CXL_MAILBOX_PAYLOAD_SHIFT 11 #define CXL_MAILBOX_MAX_PAYLOAD_SIZE (1 << CXL_MAILBOX_PAYLOAD_SHIFT) #define CXL_MAILBOX_REGISTERS_LENGTH \ @@ -83,7 +95,7 @@ (CXL_DEVICE_CAP_REG_SIZE + CXL_DEVICE_STATUS_REGISTERS_LENGTH + \ CXL_MAILBOX_REGISTERS_LENGTH + CXL_MEMORY_DEVICE_REGISTERS_LENGTH) -/* 8.2.8.4.5.1 Command Return Codes */ +/* CXL r3.1 Table 8-34: Command Return Codes */ typedef enum { CXL_MBOX_SUCCESS = 0x0, CXL_MBOX_BG_STARTED = 0x1, @@ -108,7 +120,17 @@ typedef enum { CXL_MBOX_INCORRECT_PASSPHRASE = 0x14, CXL_MBOX_UNSUPPORTED_MAILBOX = 0x15, CXL_MBOX_INVALID_PAYLOAD_LENGTH = 0x16, - CXL_MBOX_MAX = 0x17 + CXL_MBOX_INVALID_LOG = 0x17, + CXL_MBOX_INTERRUPTED = 0x18, + CXL_MBOX_UNSUPPORTED_FEATURE_VERSION = 0x19, + CXL_MBOX_UNSUPPORTED_FEATURE_SELECTION_VALUE = 0x1a, + CXL_MBOX_FEATURE_TRANSFER_IN_PROGRESS = 0x1b, + CXL_MBOX_FEATURE_TRANSFER_OUT_OF_ORDER = 0x1c, + CXL_MBOX_RESOURCES_EXHAUSTED = 0x1d, + CXL_MBOX_INVALID_EXTENT_LIST = 0x1e, + CXL_MBOX_TRANSFER_OUT_OF_ORDER = 0x1f, + CXL_MBOX_REQUEST_ABORT_NOTSUP = 0x20, + CXL_MBOX_MAX = 0x20 } CXLRetCode; typedef struct CXLCCI CXLCCI; @@ -169,7 +191,7 @@ typedef struct CXLCCI { typedef struct cxl_device_state { MemoryRegion device_registers; - /* mmio for device capabilities array - 8.2.8.2 */ + /* CXL r3.1 Section 8.2.8.3: Device Status Registers */ struct { MemoryRegion device; union { @@ -231,7 +253,7 @@ void cxl_device_register_init_t3(CXLType3Dev *ct3d); void cxl_device_register_init_swcci(CSWMBCCIDev *sw); /* - * CXL 2.0 - 8.2.8.1 including errata F4 + * CXL r3.1 Section 8.2.8.1: CXL Device Capabilities Array Register * Documented as a 128 bit register, but 64 bit accesses and the second * 64 bits are currently reserved. */ @@ -246,17 +268,18 @@ void cxl_event_set_status(CXLDeviceState *cxl_dstate, CXLEventLogType log_type, /* * Helper macro to initialize capability headers for CXL devices. * - * In the 8.2.8.2, this is listed as a 128b register, but in 8.2.8, it says: + * In CXL r3.1 Section 8.2.8.2: CXL Device Capablity Header Register, this is + * listed as a 128b register, but in CXL r3.1 Section 8.2.8: CXL Device Register + * Interface, it says: * > No registers defined in Section 8.2.8 are larger than 64-bits wide so that * > is the maximum access size allowed for these registers. If this rule is not - * > followed, the behavior is undefined + * > followed, the behavior is undefined. * - * CXL 2.0 Errata F4 states further that the layouts in the specification are - * shown as greater than 128 bits, but implementations are expected to - * use any size of access up to 64 bits. + * > To illustrate how the fields fit together, the layouts ... are shown as + * > wider than a 64 bit register. Implemenations are expected to use any size + * > accesses for this information up to 64 bits without lost of functionality * - * Here we've chosen to make it 4 dwords. The spec allows any pow2 multiple - * access to be used for a register up to 64 bits. + * Here we've chosen to make it 4 dwords. */ #define CXL_DEVICE_CAPABILITY_HEADER_REGISTER(n, offset) \ REG32(CXL_DEV_##n##_CAP_HDR0, offset) \ @@ -306,7 +329,8 @@ void cxl_initialize_t3_ld_cci(CXLCCI *cci, DeviceState *d, CAP_LENGTH, CXL_##reg##_REGISTERS_LENGTH); \ } while (0) -/* CXL 3.0 8.2.8.3.1 Event Status Register */ +/* CXL r3.2 Section 8.2.8.3.1: Event Status Register */ +#define CXL_DEVICE_STATUS_VERSION 2 REG64(CXL_DEV_EVENT_STATUS, 0) FIELD(CXL_DEV_EVENT_STATUS, EVENT_STATUS, 0, 32) @@ -348,6 +372,8 @@ REG64(CXL_DEV_BG_CMD_STS, 0x18) /* CXL r3.1 Section 8.2.8.4.8: Command Payload Registers */ REG32(CXL_DEV_CMD_PAYLOAD, 0x20) +/* CXL r3.1 Section 8.2.8.4.1: Memory Device Status Registers */ +#define CXL_MEM_DEV_STATUS_VERSION 1 REG64(CXL_MEM_DEV_STS, 0) FIELD(CXL_MEM_DEV_STS, FATAL, 0, 1) FIELD(CXL_MEM_DEV_STS, FW_HALT, 1, 1) diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h index d778487b7e..5170b8dbf8 100644 --- a/include/hw/cxl/cxl_events.h +++ b/include/hw/cxl/cxl_events.h @@ -13,7 +13,7 @@ #include "qemu/uuid.h" /* - * CXL rev 3.0 section 8.2.9.2.2; Table 8-49 + * CXL r3.1 section 8.2.9.2.2: Get Event Records (Opcode 0100h); Table 8-52 * * Define these as the bit position for the event status register for ease of * setting the status. @@ -29,7 +29,7 @@ typedef enum CXLEventLogType { /* * Common Event Record Format - * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + * CXL r3.1 section 8.2.9.2.1: Event Records; Table 8-43 */ #define CXL_EVENT_REC_HDR_RES_LEN 0xf typedef struct CXLEventRecordHdr { @@ -52,7 +52,7 @@ typedef struct CXLEventRecordRaw { /* * Get Event Records output payload - * CXL rev 3.0 section 8.2.9.2.2; Table 8-50 + * CXL r3.1 section 8.2.9.2.2; Table 8-53 */ #define CXL_GET_EVENT_FLAG_OVERFLOW BIT(0) #define CXL_GET_EVENT_FLAG_MORE_RECORDS BIT(1) @@ -70,7 +70,7 @@ typedef struct CXLGetEventPayload { /* * Clear Event Records input payload - * CXL rev 3.0 section 8.2.9.2.3; Table 8-51 + * CXL r3.1 section 8.2.9.2.3; Table 8-54 */ typedef struct CXLClearEventPayload { uint8_t event_log; /* CXLEventLogType */ @@ -80,10 +80,10 @@ typedef struct CXLClearEventPayload { uint16_t handle[]; } CXLClearEventPayload; -/** +/* * Event Interrupt Policy * - * CXL rev 3.0 section 8.2.9.2.4; Table 8-52 + * CXL r3.1 section 8.2.9.2.4; Table 8-55 */ typedef enum CXLEventIntMode { CXL_INT_NONE = 0x00, @@ -106,7 +106,7 @@ typedef struct CXLEventInterruptPolicy { /* * General Media Event Record - * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + * CXL r3.1 Section 8.2.9.2.1.1; Table 8-45 */ #define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 #define CXL_EVENT_GEN_MED_RES_SIZE 0x2e @@ -126,7 +126,7 @@ typedef struct CXLEventGenMedia { /* * DRAM Event Record - * CXL Rev 3.0 Section 8.2.9.2.1.2: Table 8-44 + * CXL r3.1 Section 8.2.9.2.1.2: Table 8-46 * All fields little endian. */ typedef struct CXLEventDram { @@ -149,7 +149,7 @@ typedef struct CXLEventDram { /* * Memory Module Event Record - * CXL Rev 3.0 Section 8.2.9.2.1.3: Table 8-45 + * CXL r3.1 Section 8.2.9.2.1.3: Table 8-47 * All fields little endian. */ typedef struct CXLEventMemoryModule { diff --git a/include/hw/cxl/cxl_pci.h b/include/hw/cxl/cxl_pci.h index ddf01a543b..265db6c407 100644 --- a/include/hw/cxl/cxl_pci.h +++ b/include/hw/cxl/cxl_pci.h @@ -16,9 +16,8 @@ #define PCIE_DVSEC_HEADER1_OFFSET 0x4 /* Offset from start of extend cap */ #define PCIE_DVSEC_ID_OFFSET 0x8 -#define PCIE_CXL_DEVICE_DVSEC_LENGTH 0x38 -#define PCIE_CXL1_DEVICE_DVSEC_REVID 0 -#define PCIE_CXL2_DEVICE_DVSEC_REVID 1 +#define PCIE_CXL_DEVICE_DVSEC_LENGTH 0x3C +#define PCIE_CXL31_DEVICE_DVSEC_REVID 3 #define EXTENSIONS_PORT_DVSEC_LENGTH 0x28 #define EXTENSIONS_PORT_DVSEC_REVID 0 @@ -29,8 +28,8 @@ #define GPF_DEVICE_DVSEC_LENGTH 0x10 #define GPF_DEVICE_DVSEC_REVID 0 -#define PCIE_FLEXBUS_PORT_DVSEC_LENGTH_2_0 0x14 -#define PCIE_FLEXBUS_PORT_DVSEC_REVID_2_0 1 +#define PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH 0x20 +#define PCIE_CXL3_FLEXBUS_PORT_DVSEC_REVID 2 #define REG_LOC_DVSEC_LENGTH 0x24 #define REG_LOC_DVSEC_REVID 0 @@ -55,16 +54,26 @@ typedef struct DVSECHeader { QEMU_BUILD_BUG_ON(sizeof(DVSECHeader) != 10); /* - * CXL 2.0 devices must implement certain DVSEC IDs, and can [optionally] + * CXL r3.1 Table 8-2: CXL DVSEC ID Assignment + * Devices must implement certain DVSEC IDs, and can [optionally] * implement others. + * (x) - IDs in Table 8-2. * - * CXL 2.0 Device: 0, [2], 5, 8 - * CXL 2.0 RP: 3, 4, 7, 8 - * CXL 2.0 Upstream Port: [2], 7, 8 - * CXL 2.0 Downstream Port: 3, 4, 7, 8 + * CXL RCD (D1): 0, [2], [5], 7, [8], A - Not emulated yet + * CXL RCD USP (UP1): 7, [8] - Not emulated yet + * CXL RCH DSP (DP1): 7, [8] + * CXL SLD (D2): 0, [2], 5, 7, 8, [A] + * CXL LD (LD): 0, [2], 5, 7, 8 + * CXL RP (R): 3, 4, 7, 8 + * CXL Switch USP (USP): [2], 7, 8 + * CXL Switch DSP (DSP): 3, 4, 7, 8 + * FM-Owned LD (FMLD): 0, [2], 7, 8, 9 */ -/* CXL 2.0 - 8.1.3 (ID 0001) */ +/* + * CXL r3.1 Section 8.1.3: PCIe DVSEC for Devices + * DVSEC ID: 0, Revision: 3 + */ typedef struct CXLDVSECDevice { DVSECHeader hdr; uint16_t cap; @@ -82,10 +91,14 @@ typedef struct CXLDVSECDevice { uint32_t range2_size_lo; uint32_t range2_base_hi; uint32_t range2_base_lo; -} CXLDVSECDevice; -QEMU_BUILD_BUG_ON(sizeof(CXLDVSECDevice) != 0x38); + uint16_t cap3; +} QEMU_PACKED CXLDVSECDevice; +QEMU_BUILD_BUG_ON(sizeof(CXLDVSECDevice) != 0x3A); -/* CXL 2.0 - 8.1.5 (ID 0003) */ +/* + * CXL r3.1 Section 8.1.5: CXL Extensions DVSEC for Ports + * DVSEC ID: 3, Revision: 0 + */ typedef struct CXLDVSECPortExt { DVSECHeader hdr; uint16_t status; @@ -107,7 +120,10 @@ QEMU_BUILD_BUG_ON(sizeof(CXLDVSECPortExt) != 0x28); #define PORT_CONTROL_UNMASK_SBR 1 #define PORT_CONTROL_ALT_MEMID_EN 4 -/* CXL 2.0 - 8.1.6 GPF DVSEC (ID 0004) */ +/* + * CXL r3.1 Section 8.1.6: GPF DVSEC for CXL Port + * DVSEC ID: 4, Revision: 0 + */ typedef struct CXLDVSECPortGPF { DVSECHeader hdr; uint16_t rsvd; @@ -116,7 +132,10 @@ typedef struct CXLDVSECPortGPF { } CXLDVSECPortGPF; QEMU_BUILD_BUG_ON(sizeof(CXLDVSECPortGPF) != 0x10); -/* CXL 2.0 - 8.1.7 GPF DVSEC for CXL Device */ +/* + * CXL r3.1 Section 8.1.7: GPF DVSEC for CXL Device + * DVSEC ID: 5, Revision 0 + */ typedef struct CXLDVSECDeviceGPF { DVSECHeader hdr; uint16_t phase2_duration; @@ -124,17 +143,27 @@ typedef struct CXLDVSECDeviceGPF { } CXLDVSECDeviceGPF; QEMU_BUILD_BUG_ON(sizeof(CXLDVSECDeviceGPF) != 0x10); -/* CXL 2.0 - 8.1.8/8.2.1.3 Flex Bus DVSEC (ID 0007) */ +/* + * CXL r3.1 Section 8.1.8: PCIe DVSEC for Flex Bus Port + * CXL r3.1 Section 8.2.1.3: Flex Bus Port DVSEC + * DVSEC ID: 7, Revision 2 + */ typedef struct CXLDVSECPortFlexBus { DVSECHeader hdr; uint16_t cap; uint16_t ctrl; uint16_t status; uint32_t rcvd_mod_ts_data_phase1; + uint32_t cap2; + uint32_t ctrl2; + uint32_t status2; } CXLDVSECPortFlexBus; -QEMU_BUILD_BUG_ON(sizeof(CXLDVSECPortFlexBus) != 0x14); +QEMU_BUILD_BUG_ON(sizeof(CXLDVSECPortFlexBus) != 0x20); -/* CXL 2.0 - 8.1.9 Register Locator DVSEC (ID 0008) */ +/* + * CXL r3.1 Section 8.1.9: Register Locator DVSEC + * DVSEC ID: 8, Revision 0 + */ typedef struct CXLDVSECRegisterLocator { DVSECHeader hdr; uint16_t rsvd; From 574b64aa6754ba491f51024c5a823a674d48a658 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 29 Jan 2024 10:39:21 +0300 Subject: [PATCH 54/60] virtio-gpu: Correct virgl_renderer_resource_get_info() error check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit virgl_renderer_resource_get_info() returns errno and not -1 on error. Correct the return-value check. Reviewed-by: Marc-André Lureau Signed-off-by: Dmitry Osipenko Message-Id: <20240129073921.446869-1-dmitry.osipenko@collabora.com> Cc: qemu-stable@nongnu.org Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- contrib/vhost-user-gpu/virgl.c | 6 +++--- hw/display/virtio-gpu-virgl.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/contrib/vhost-user-gpu/virgl.c b/contrib/vhost-user-gpu/virgl.c index d1ccdf7d06..51da0e3667 100644 --- a/contrib/vhost-user-gpu/virgl.c +++ b/contrib/vhost-user-gpu/virgl.c @@ -327,7 +327,7 @@ virgl_get_resource_info_modifiers(uint32_t resource_id, #ifdef VIRGL_RENDERER_RESOURCE_INFO_EXT_VERSION struct virgl_renderer_resource_info_ext info_ext; ret = virgl_renderer_resource_get_info_ext(resource_id, &info_ext); - if (ret < 0) { + if (ret) { return ret; } @@ -335,7 +335,7 @@ virgl_get_resource_info_modifiers(uint32_t resource_id, *modifiers = info_ext.modifiers; #else ret = virgl_renderer_resource_get_info(resource_id, info); - if (ret < 0) { + if (ret) { return ret; } @@ -372,7 +372,7 @@ virgl_cmd_set_scanout(VuGpu *g, uint64_t modifiers = 0; ret = virgl_get_resource_info_modifiers(ss.resource_id, &info, &modifiers); - if (ret == -1) { + if (ret) { g_critical("%s: illegal resource specified %d\n", __func__, ss.resource_id); cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c index 8bb7a2c21f..9f34d0e661 100644 --- a/hw/display/virtio-gpu-virgl.c +++ b/hw/display/virtio-gpu-virgl.c @@ -181,7 +181,7 @@ static void virgl_cmd_set_scanout(VirtIOGPU *g, memset(&info, 0, sizeof(info)); ret = virgl_renderer_resource_get_info(ss.resource_id, &info); #endif - if (ret == -1) { + if (ret) { qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal resource specified %d\n", __func__, ss.resource_id); From e8ddec58053e9361b2cc18ec6d17b6c95590bf3c Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Mon, 29 Jan 2024 17:03:07 +0900 Subject: [PATCH 55/60] hw/smbios: Fix OEM strings table option validation qemu_smbios_type11_opts did not have the list terminator and that resulted in out-of-bound memory access. It also needs to have an element for the type option. Cc: qemu-stable@nongnu.org Fixes: 2d6dcbf93fb0 ("smbios: support setting OEM strings table") Signed-off-by: Akihiko Odaki Reviewed-by: Michael Tokarev Message-Id: <20240129-smbios-v2-1-9ee6fede0d10@daynix.com> Reviewed-by: Ani Sinha Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/smbios/smbios.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index c0c5a81e66..a9048c1342 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -375,6 +375,11 @@ static const QemuOptDesc qemu_smbios_type8_opts[] = { }; static const QemuOptDesc qemu_smbios_type11_opts[] = { + { + .name = "type", + .type = QEMU_OPT_NUMBER, + .help = "SMBIOS element type", + }, { .name = "value", .type = QEMU_OPT_STRING, @@ -385,6 +390,7 @@ static const QemuOptDesc qemu_smbios_type11_opts[] = { .type = QEMU_OPT_STRING, .help = "OEM string data from file", }, + { /* end of list */ } }; static const QemuOptDesc qemu_smbios_type17_opts[] = { From 33b081e2947db6117b27f8c76544a756053f5514 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Mon, 29 Jan 2024 17:03:08 +0900 Subject: [PATCH 56/60] hw/smbios: Fix port connector option validation qemu_smbios_type8_opts did not have the list terminator and that resulted in out-of-bound memory access. It also needs to have an element for the type option. Cc: qemu-stable@nongnu.org Fixes: fd8caa253c56 ("hw/smbios: support for type 8 (port connector)") Signed-off-by: Akihiko Odaki Reviewed-by: Michael Tokarev Message-Id: <20240129-smbios-v2-2-9ee6fede0d10@daynix.com> Reviewed-by: Ani Sinha Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/smbios/smbios.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index a9048c1342..a3c4e52ce9 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -352,6 +352,11 @@ static const QemuOptDesc qemu_smbios_type4_opts[] = { }; static const QemuOptDesc qemu_smbios_type8_opts[] = { + { + .name = "type", + .type = QEMU_OPT_NUMBER, + .help = "SMBIOS element type", + }, { .name = "internal_reference", .type = QEMU_OPT_STRING, @@ -372,6 +377,7 @@ static const QemuOptDesc qemu_smbios_type8_opts[] = { .type = QEMU_OPT_NUMBER, .help = "port type", }, + { /* end of list */ } }; static const QemuOptDesc qemu_smbios_type11_opts[] = { From 8b162082cb2d495f8661b7ee3239a36936dab2ee Mon Sep 17 00:00:00 2001 From: Manos Pitsidianakis Date: Tue, 30 Jan 2024 16:59:19 +0200 Subject: [PATCH 57/60] hw/display/virtio-gpu.c: use reset_bh class method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While the VirtioGPU type has a reset_bh field to specify a reset callback, it's never used. virtio_gpu_reset() calls the general virtio_gpu_reset_bh() function for all devices that inherit from VirtioGPU. While no devices override reset_bh at the moment, a device reset might require special logic for implementations in the future. Reviewed-by: Marc-André Lureau Signed-off-by: Manos Pitsidianakis Message-Id: <87fb4fa72ce5b341a6f957513a00dcb79fd5997f.1706626470.git.manos.pitsidianakis@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/display/virtio-gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index f8a675eb30..2b73ae585b 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -1515,7 +1515,7 @@ void virtio_gpu_reset(VirtIODevice *vdev) qemu_cond_wait_bql(&g->reset_cond); } } else { - virtio_gpu_reset_bh(g); + aio_bh_call(g->reset_bh); } while (!QTAILQ_EMPTY(&g->cmdq)) { From 330399bd8935c9c2100c40e168781f405545d05a Mon Sep 17 00:00:00 2001 From: Manos Pitsidianakis Date: Tue, 30 Jan 2024 16:59:20 +0200 Subject: [PATCH 58/60] virtio-gpu.c: add resource_destroy class method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When destroying/unrefing resources, devices such as virtio-gpu-rutabaga need to do their own bookkeeping (free rutabaga resources that are associated with the virtio_gpu_simple_resource). This commit adds a class method so that virtio-gpu-rutabaga can override it in the next commit. Reviewed-by: Marc-André Lureau Signed-off-by: Manos Pitsidianakis Message-Id: Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/display/virtio-gpu.c | 25 ++++++++++++++++++++++--- include/hw/virtio/virtio-gpu.h | 3 +++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index 2b73ae585b..1c1ee230b3 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -402,7 +402,8 @@ static void virtio_gpu_disable_scanout(VirtIOGPU *g, int scanout_id) } static void virtio_gpu_resource_destroy(VirtIOGPU *g, - struct virtio_gpu_simple_resource *res) + struct virtio_gpu_simple_resource *res, + Error **errp) { int i; @@ -438,7 +439,11 @@ static void virtio_gpu_resource_unref(VirtIOGPU *g, cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; return; } - virtio_gpu_resource_destroy(g, res); + /* + * virtio_gpu_resource_destroy does not set any errors, so pass a NULL errp + * to ignore them. + */ + virtio_gpu_resource_destroy(g, res, NULL); } static void virtio_gpu_transfer_to_host_2d(VirtIOGPU *g, @@ -1488,11 +1493,24 @@ static void virtio_gpu_device_unrealize(DeviceState *qdev) static void virtio_gpu_reset_bh(void *opaque) { VirtIOGPU *g = VIRTIO_GPU(opaque); + VirtIOGPUClass *vgc = VIRTIO_GPU_GET_CLASS(g); struct virtio_gpu_simple_resource *res, *tmp; + uint32_t resource_id; + Error *local_err = NULL; int i = 0; QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) { - virtio_gpu_resource_destroy(g, res); + resource_id = res->resource_id; + vgc->resource_destroy(g, res, &local_err); + if (local_err) { + error_append_hint(&local_err, "%s: %s resource_destroy" + "for resource_id = %"PRIu32" failed.\n", + __func__, object_get_typename(OBJECT(g)), + resource_id); + /* error_report_err frees the error object for us */ + error_report_err(local_err); + local_err = NULL; + } } for (i = 0; i < g->parent_obj.conf.max_outputs; i++) { @@ -1632,6 +1650,7 @@ static void virtio_gpu_class_init(ObjectClass *klass, void *data) vgc->handle_ctrl = virtio_gpu_handle_ctrl; vgc->process_cmd = virtio_gpu_simple_process_cmd; vgc->update_cursor_data = virtio_gpu_update_cursor_data; + vgc->resource_destroy = virtio_gpu_resource_destroy; vgbc->gl_flushed = virtio_gpu_handle_gl_flushed; vdc->realize = virtio_gpu_device_realize; diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h index 584ba2ed73..b28e7ef0d2 100644 --- a/include/hw/virtio/virtio-gpu.h +++ b/include/hw/virtio/virtio-gpu.h @@ -219,6 +219,9 @@ struct VirtIOGPUClass { void (*update_cursor_data)(VirtIOGPU *g, struct virtio_gpu_scanout *s, uint32_t resource_id); + void (*resource_destroy)(VirtIOGPU *g, + struct virtio_gpu_simple_resource *res, + Error **errp); }; struct VirtIOGPUGL { From 588a09dace4f9f9163bfdd6426ed3718d166b992 Mon Sep 17 00:00:00 2001 From: Manos Pitsidianakis Date: Tue, 30 Jan 2024 16:59:21 +0200 Subject: [PATCH 59/60] virtio-gpu-rutabaga.c: override resource_destroy method When the Rutabaga GPU device frees resources, it calls rutabaga_resource_unref for that resource_id. However, when the generic VirtIOGPU functions destroys resources, it only removes the virtio_gpu_simple_resource from the device's VirtIOGPU->reslist list. The rutabaga resource associated with that resource_id is then leaked. This commit overrides the resource_destroy class method introduced in the previous commit to fix this. Signed-off-by: Manos Pitsidianakis Message-Id: Reviewed-by: Gurchetan Singh Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/display/virtio-gpu-rutabaga.c | 47 ++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/hw/display/virtio-gpu-rutabaga.c b/hw/display/virtio-gpu-rutabaga.c index 9e67f9bd51..17bf701a21 100644 --- a/hw/display/virtio-gpu-rutabaga.c +++ b/hw/display/virtio-gpu-rutabaga.c @@ -147,15 +147,39 @@ rutabaga_cmd_create_resource_3d(VirtIOGPU *g, QTAILQ_INSERT_HEAD(&g->reslist, res, next); } +static void +virtio_gpu_rutabaga_resource_unref(VirtIOGPU *g, + struct virtio_gpu_simple_resource *res, + Error **errp) +{ + int32_t result; + VirtIOGPURutabaga *vr = VIRTIO_GPU_RUTABAGA(g); + + result = rutabaga_resource_unref(vr->rutabaga, res->resource_id); + if (result) { + error_setg_errno(errp, + (int)result, + "%s: rutabaga_resource_unref returned %"PRIi32 + " for resource_id = %"PRIu32, __func__, result, + res->resource_id); + } + + if (res->image) { + pixman_image_unref(res->image); + } + + QTAILQ_REMOVE(&g->reslist, res, next); + g_free(res); +} + static void rutabaga_cmd_resource_unref(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd) { - int32_t result; + int32_t result = 0; struct virtio_gpu_simple_resource *res; struct virtio_gpu_resource_unref unref; - - VirtIOGPURutabaga *vr = VIRTIO_GPU_RUTABAGA(g); + Error *local_err = NULL; VIRTIO_GPU_FILL_CMD(unref); @@ -164,15 +188,14 @@ rutabaga_cmd_resource_unref(VirtIOGPU *g, res = virtio_gpu_find_resource(g, unref.resource_id); CHECK(res, cmd); - result = rutabaga_resource_unref(vr->rutabaga, unref.resource_id); - CHECK(!result, cmd); - - if (res->image) { - pixman_image_unref(res->image); + virtio_gpu_rutabaga_resource_unref(g, res, &local_err); + if (local_err) { + error_report_err(local_err); + /* local_err was freed, do not reuse it. */ + local_err = NULL; + result = 1; } - - QTAILQ_REMOVE(&g->reslist, res, next); - g_free(res); + CHECK(!result, cmd); } static void @@ -1099,7 +1122,7 @@ static void virtio_gpu_rutabaga_class_init(ObjectClass *klass, void *data) vgc->handle_ctrl = virtio_gpu_rutabaga_handle_ctrl; vgc->process_cmd = virtio_gpu_rutabaga_process_cmd; vgc->update_cursor_data = virtio_gpu_rutabaga_update_cursor; - + vgc->resource_destroy = virtio_gpu_rutabaga_resource_unref; vdc->realize = virtio_gpu_rutabaga_realize; device_class_set_props(dc, virtio_gpu_rutabaga_properties); } From 1dd6954c3f5c5c610cf94b6f740118e565957293 Mon Sep 17 00:00:00 2001 From: Raphael Norwitz Date: Sat, 3 Feb 2024 21:37:58 -0500 Subject: [PATCH 60/60] MAINTAINERS: Switch to my Enfabrica email MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'd prefer to use my new work email so this change updates MAINTAINERS with it. Signed-off-by: Raphael Norwitz Message-Id: <20240204023758.83191-1-raphael.s.norwitz@gmail.com> Reviewed-by: Alex Bennée Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7a1afb40ac..0bf716db6a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2570,7 +2570,7 @@ F: include/hw/virtio/virtio-gpu.h F: docs/system/devices/virtio-gpu.rst vhost-user-blk -M: Raphael Norwitz +M: Raphael Norwitz S: Maintained F: contrib/vhost-user-blk/ F: contrib/vhost-user-scsi/